Ejemplo n.º 1
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (input_height, input_width).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)

        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.4] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Ejemplo n.º 2
0
    def encode(self, boxes, labels, input_size):
        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)

        anchor_boxes = self.get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes = boxes.float()
        ious = box_iou(anchor_boxes, boxes, order='xywh')
        #ious :每个候选框与多个目标框的ious 行:个数 列:分数
        #max_ids 与候选框最相近的目标框索引号,max_ious 为与最相近目标框的候选框的ious分数
        max_ious, max_ids = ious.max(1)
        #选出每个候选框对应的目标框
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)

        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.1] = 0
        cls_targets[(max_ious >= 0.1) & (max_ious < 0.3)] = -1
        return loc_targets, cls_targets
Ejemplo n.º 3
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax) in range [0,1], sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int) model input size.

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#total_anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#total_anchors].
        '''
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes = boxes * input_size  # scale to range [0,input_size]

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.4] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Ejemplo n.º 4
0
    def encode(self, boxes, labels, input_size):

        """We obey the Faster RCNN box coder:
        tx = (x - anchor_x) / anchor_w
        ty = (y - anchor_y) / anchor_h
        tw = log(w / anchor_w)
        th = log(h / anchor_h)
        args:
        boxes:Tensor(xmin, ymin, xmax, ymax) size(boxes_num, 4)
        labels:Tensor size(boxes_num,)
        return:
        target_cls:Tensor(anchor_num,)
        target_loc:Tensor(anchor_num, 4)
        """

        anchor_boxes = self._get_anchor_boxes(input_size)  # [anchor_num, 4]
        boxes = utils.change_box_order(boxes, 'xyxy2xywh')
        ious = utils.box_iou(anchor_boxes, boxes, order='xywh')  # [anchor_num, boxes_num]
        max_ious, max_ids = ious.max(1)  # (anchor_num,)
        boxes = boxes[max_ids]  # (anchor_num, 4), groundtruth
        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        target_loc = torch.cat([loc_xy, loc_wh], 1)
        target_cls = labels[max_ids]
        target_cls[max_ious < 0.5] = 0
        ignore = (max_ious < 0.5) & (max_ious >= 0.4)
        target_cls[ignore] = -1

        return target_loc, target_cls
Ejemplo n.º 5
0
    def estimateTrackedBbox(self, z):
        """
        Handles all kinds of estimation: 
            unmatched detections (new detection)
            unmatched tracking (no confident detections)
            matched detection (general tracking)
        """
        z = np.expand_dims(z, axis=0).T
        if self.box == [] or self.x_state == []:
            # unmatched detection
            self.x_state = np.array([[z[0], 0, z[1], 0, z[2], 0, z[3], 0]]).T
            self.predict_only()
            self.missed_dets = 0
        else:
            iou = box_iou(self.box, z)
            if iou > self.iou_thr:
                # matched tracking
                self.kalman_filter(z)
                self.missed_dets = 0
            else:
                # unmatched tracking
                if self.missed_dets > self.max_age:
                    return False
                self.missed_dets += 1
                self.predict_only()

        xx = self.x_state.T[0].tolist()
        self.box = [xx[0], xx[2], xx[4], xx[6]]
        return True
def find_best_pred(gt_boxes, pred_boxes):
    '''
    Find whether there is a predicted box for each ground box

    Args:
      gt_boxes: (FloatTensor) [N, 6]  zyxzyx
      pred_boxes: (FloatTensor) [M, 6]   zyxzyx
    
    Returns:
      count: (ndarray) (tp, fn, fp)
    '''
    tp = 0
    fn = 0
    fp = 0
    distance = box_distance(gt_boxes, pred_boxes)
    iou = box_iou(gt_boxes, pred_boxes)
    min_dists, min_ids = distance.min(1)
    best_ious, best_ids = iou.min(0)  # find best gt for predict
    gt_boxes = change_box_order(gt_boxes, order="zyxzyx2zyxdhw")
    for i in range(gt_boxes.size(0)):
        gt = gt_boxes[i, :]
        diameter = math.sqrt(gt[3]**2 + gt[4]**2 + gt[5]**2)
        radius = diameter / 2 + 10.
        if min_dists[i] <= radius:
            tp += 1
        else:
            fn += 1
    fp = pred_boxes.size(0) - tp

    return np.array([tp, fn, fp]), best_ious
Ejemplo n.º 7
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                object_mask_bool[b, ..., 0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = K.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(
         b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
     return b + 1, ignore_mask
Ejemplo n.º 8
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[i][b, ..., 0:4],
                                object_mask_bool[b, ..., 0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = tf.keras.backend.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(
         b, tf.cast(best_iou < ignore_thresh, true_box.dtype))
     return b + 1, ignore_mask
Ejemplo n.º 9
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels into YOLOv2 format.

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax) in range [0,1], sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int) model input size.

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [5,4,fmsize,fmsize].
          cls_targets: (tensor) encoded class labels, sized [5,20,fmsize,fmsize].
          box_targets: (tensor) truth boxes, sized [#obj,4].
        '''
        num_boxes = len(boxes)
        # input_size -> fmsize
        # 320->10, 352->11, 384->12, 416->13, ..., 608->19
        fmsize = (input_size - 320) / 32 + 10
        grid_size = input_size / fmsize

        boxes *= input_size  # scale [0,1] -> [0,input_size]
        bx = (boxes[:, 0] + boxes[:, 2]) * 0.5 / grid_size  # in [0,fmsize]
        by = (boxes[:, 1] + boxes[:, 3]) * 0.5 / grid_size  # in [0,fmsize]
        bw = (boxes[:, 2] - boxes[:, 0]) / grid_size  # in [0,fmsize]
        bh = (boxes[:, 3] - boxes[:, 1]) / grid_size  # in [0,fmsize]

        tx = bx - bx.floor()
        ty = by - by.floor()

        xy = meshgrid(fmsize,
                      swap_dims=True) + 0.5  # grid center, [fmsize*fmsize,2]
        wh = torch.Tensor(self.anchors)  # [5,2]

        xy = xy.view(fmsize, fmsize, 1, 2).expand(fmsize, fmsize, 5, 2)
        wh = wh.view(1, 1, 5, 2).expand(fmsize, fmsize, 5, 2)
        anchor_boxes = torch.cat([xy - wh / 2, xy + wh / 2],
                                 3)  # [fmsize,fmsize,5,4]

        ious = box_iou(anchor_boxes.view(-1, 4),
                       boxes / grid_size)  # [fmsize*fmsize*5,N]
        ious = ious.view(fmsize, fmsize, 5, num_boxes)  # [fmsize,fmsize,5,N]

        loc_targets = torch.zeros(5, 4, fmsize, fmsize)  # 5boxes * 4coords
        cls_targets = torch.zeros(5, 20, fmsize, fmsize)
        for i in range(num_boxes):
            cx = int(bx[i])
            cy = int(by[i])
            _, max_idx = ious[cy, cx, :, i].max(0)
            j = max_idx[0]
            cls_targets[j, labels[i], cy, cx] = 1

            tw = bw[i] / self.anchors[j][0]
            th = bh[i] / self.anchors[j][1]
            loc_targets[j, :, cy, cx] = torch.Tensor([tx[i], ty[i], tw, th])
        return loc_targets, cls_targets, boxes / grid_size
Ejemplo n.º 10
0
    def encode(self, gt_quad_boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        TextBoxes++ quad_box encoder:
          tx_n = (x_n - anchor_x) / anchor_w
          ty_n = (y_n - anchor_y) / anchor_h

        Args:
          gt_quad_boxes: (tensor) bounding boxes of (xyxyxyxy), sized [#obj, 8].
          labels: (tensor) object class labels, sized [#obj, ].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,8].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''

        input_size = torch.Tensor([input_size, input_size]) if isinstance(input_size, int) \
            else torch.Tensor(input_size)

        anchor_rect_boxes = self._get_anchor_boxes(
            input_size)  # (num_anchor, 8)
        anchor_quad_boxes = change_box_order(anchor_rect_boxes,
                                             "xywh2quad")  # (num_anchor, 4)

        gt_rect_boxes = change_box_order(gt_quad_boxes, "quad2xyxy")

        ious = box_iou(anchor_rect_boxes, gt_rect_boxes)
        max_ious, max_ids = ious.max(1)

        # Each anchor box matches the largest iou with the gt box
        gt_quad_boxes = gt_quad_boxes[max_ids]  # (num_gt_boxes, 8)
        gt_rect_boxes = gt_rect_boxes[max_ids]  # (num_gt_boxes, 4)

        # for Rectangle boxes -> using in TextBoxes
        #gt_rect_boxes = change_box_order(gt_rect_boxes, "xyxy2xywh")
        #loc_rect_yx = (gt_rect_boxes[:, :2] - anchor_rect_boxes[:, :2]) / anchor_rect_boxes[:, 2:]
        #loc_rect_hw = torch.log(gt_rect_boxes[:, 2:] / anchor_rect_boxes[:, 2:])

        # for Quad boxes -> using in TextBoxes++
        anchor_boxes_hw = anchor_rect_boxes[:, 2:4].repeat(1, 4)
        loc_quad_yx = (gt_quad_boxes - anchor_quad_boxes) / anchor_boxes_hw

        # loc_targets = torch.cat([loc_rect_yx, loc_rect_hw, loc_quad_yx], dim=1) # (num_anchor, 12)
        loc_targets = loc_quad_yx
        cls_targets = labels[max_ids]

        cls_targets[max_ious < 0.5] = -1  # ignore (0.4~0.5) : -1
        cls_targets[max_ious < 0.4] = 0  # background (0.0~0.4): 0
        # positive (0.5~1.0) : 1
        return loc_targets, cls_targets
Ejemplo n.º 11
0
 def encode(self, labels, boxes, input_size=None, test=False):
     '''
     编码xml中的object格式为bounding boxes regression的格式
     tx = (x - anchor_x) / anchor_w
     ty = (y - anchor_y) / anchor_h
     tw = log(w / anchor_w)
     th = log(h / anchor_h)
     注意,这个方法输入的是单张图片的objects,所以使用的时候必须一张图片一张
         图片的输入
     args:
         labels: tensor, 每个gtbb的标签,size是[#box,]
         boxes: tensor, ground truth bounding boxes,
             (xmin, ymin, xmax, ymax),size是[#box, 4]
         input_size:int/tuple,输入图像的大小
         test: 测试时使用;
     returns:
         cls_targets: tensor,每个anchor被赋予的标签,size是[#anchors, ],
             其中的值0代表背景类,1-k表示k个分类,-1表示忽略的anchors
         loc_targets: tensor,每个anchor被赋予的bbr的标签,size是
             [#anchors, 4],#anchors是所有特征图上的所有anchors
     '''
     if input_size is None:
         input_size = self.input_size
         anchor_boxes = self.anchor_boxes
     else:
         if len(input_size) != 2:
             raise ValueError('TCT的input_size不是1920x1200,所以不能是None')
         input_size = torch.tensor(input_size, dtype=torch.float)
         anchor_boxes = self._get_anchor_boxes(input_size)
     boxes = change_box_order(boxes, 'xyxy2xywh')
     # 计算每个anchor和每个gtbb间的iou,根据此来给标签
     ious = box_iou(anchor_boxes, boxes, order='xywh')
     max_ious, max_ids = ious.max(1)
     boxes = boxes[max_ids]
     if test:
         _, orders = max_ious.sort(0, True)
         loc_targets = change_box_order(anchor_boxes, 'xywh2xyxy')[orders]
     else:
         # 计算bbr的偏移量,即bbr的标签
         loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
         loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
         loc_targets = torch.cat([loc_xy, loc_wh], 1)
     cls_targets = 1 + labels[max_ids]  # 加1是为了空出0来给背景类
     # 规定背景类,规定忽略的anchors
     cls_targets[max_ious < self.iou_thre] = 0
     ignore = (max_ious > self.ignore_thres[0]) & \
         (max_ious < self.ignore_thres[1])
     cls_targets[ignore] = -1  # 这些anchors是不用的
     if test:
         cls_targets = cls_targets[orders]
     return cls_targets, loc_targets
Ejemplo n.º 12
0
    def random_crop(self, img, boxes, labels):
        '''Randomly crop the image and adjust the bbox locations.

        Args:
          img: (PIL.Image) image.
          boxes: (tensor) bbox locations, sized [#obj, 4].
          labels: (tensor) bbox labels, sized [#obj,].

        Returns:
          img: (PIL.Image) cropped image.
          selected_boxes: (tensor) selected bbox locations.
          labels: (tensor) selected bbox labels.
        '''
        imw, imh = img.size
        while True:
            min_iou = random.choice([None, 0.1, 0.3, 0.5, 0.7, 0.9])
            if min_iou is None:
                return img, boxes, labels

            for _ in range(100):
                w = random.randrange(int(0.1 * imw), imw)
                h = random.randrange(int(0.1 * imh), imh)

                if h > 2 * w or w > 2 * h:
                    continue

                x = random.randrange(imw - w)
                y = random.randrange(imh - h)
                roi = torch.Tensor([[x, y, x + w, y + h]])

                center = (boxes[:, :2] + boxes[:, 2:]) / 2  # [N,2]
                roi2 = roi.expand(len(center), 4)  # [N,4]
                mask = (center > roi2[:, :2]) & (center < roi2[:, 2:])  # [N,2]
                mask = mask[:, 0] & mask[:, 1]  #[N,]
                if not mask.any():
                    continue

                selected_boxes = boxes.index_select(0,
                                                    mask.nonzero().squeeze(1))

                ious = box_iou(selected_boxes, roi)
                if ious.min() < min_iou:
                    continue

                img = img.crop((x, y, x + w, y + h))
                selected_boxes[:, 0].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 1].add_(-y).clamp_(min=0, max=h)
                selected_boxes[:, 2].add_(-x).clamp_(min=0, max=w)
                selected_boxes[:, 3].add_(-y).clamp_(min=0, max=h)
                return img, selected_boxes, labels[mask]
Ejemplo n.º 13
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.tensor([input_size, input_size], dtype=torch.float32) if isinstance(input_size, int) \
            else torch.tensor(input_size, dtype=torch.float32)
        anchor_boxes = self._get_anchor_boxes(input_size)

        if boxes.numel() == 0:
            # 0 is background class
            cls_targets = torch.zeros(anchor_boxes.size(0), dtype=torch.int64)
            loc_targets = torch.zeros_like(anchor_boxes, dtype=torch.float32)
        else:
            boxes = change_box_order(boxes, 'xyxy2xywh')

            ious = box_iou(anchor_boxes, boxes, order='xywh')
            max_ious, max_ids = ious.max(1)
            boxes = boxes[max_ids]

            loc_xy = (boxes[:, :2]-anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
            loc_wh = torch.log(boxes[:, 2:]/anchor_boxes[:, 2:])
            loc_targets = torch.cat([loc_xy, loc_wh], 1)
            cls_targets = labels[max_ids]

            cls_targets[max_ious < 0.5] = 0 # 0 is background class
            ignore = (max_ious > 0.4) & (max_ious < 0.5)  # ignore ious between [0.4,0.5]
            cls_targets[ignore] = -1  # for now just mark ignored to -1

        return loc_targets, cls_targets
Ejemplo n.º 14
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size, input_size]) if isinstance(input_size, int) \
            else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        # if ((boxes[0][2] * boxes[0][3]).numpy() >32 * 32 / 2) :
        #     # print((boxes[0][2]*boxes[0][3]).numpy(),end='->')
        #     sptj='True'
        # else:
        #     sptj = 'False'
        # print('target locked -> ',sptj)

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.5] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return cls_targets, loc_targets
Ejemplo n.º 15
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Then we scale [tx,ty,tw,th] by [10,10,5,5] times to make loc_loss larger.

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (input_height, input_width).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].

        Reference:
          https://github.com/tensorflow/models/blob/master/object_detection/box_coders/faster_rcnn_box_coder.py
        '''
        scale_factor = torch.Tensor([10,10,5,5])  # scale [tx,ty,tw,th]
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
        loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
        loc_targets = torch.cat([loc_xy,loc_wh], 1) * scale_factor
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious<0.4] = 0
        ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Ejemplo n.º 16
0
def statistics_result(pred_boxes, label_boxes, iou_thresh=0.5):
    correct_num = 0
    error_num = 0
    miss_num = 0
    for pbox in pred_boxes:
        is_exist = False
        for lbox in label_boxes:
            if lbox[4] == 0:
                continue
            iou = box_iou(pbox, lbox)
            if iou > iou_thresh:
                is_exist = True
                lbox[4] = 0
                break
        if is_exist:
            correct_num += 1
        else:
            error_num += 1
    miss_num = len(label_boxes) - correct_num
    return correct_num, error_num, miss_num
Ejemplo n.º 17
0
    def encode(self, boxes, labels, input_size):
        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)
        
        anchor_boxes = self.get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes = boxes.float()
        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)

        cls_targets = 1 + labels[max_ids]
        cls_targets[max_ious < 0.4] = 0
        cls_targets[(max_ious >= 0.4) & (max_ious < 0.5)] = -1
        return loc_targets, cls_targets
Ejemplo n.º 18
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        #print(anchor_boxes.shape) [49104,4]
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        #print(ious.shape) [num_anchors, obj]
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]
        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]
        #print(cls_targets.shape) torch.Size([49104])
        cls_targets[max_ious < 0.5] = 0
        #print(cls_targets)
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1

        return loc_targets, cls_targets
Ejemplo n.º 19
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:,:2]-anchor_boxes[:,:2]) / anchor_boxes[:,2:]
        loc_wh = torch.log(boxes[:,2:]/anchor_boxes[:,2:])
        loc_targets = torch.cat([loc_xy,loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious<0.5] = 0
        ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Ejemplo n.º 20
0
    def encode(self, boxes, labels, input_size):
        """
        Encode target bounding boxes and class labels.
        we obey the Faster RCNN box coder:
        tx = (x - anchor_x) / anchor_w
        ty = (y - anchor_y) / anchor_h
        tw = log(w / anchor_w)
        th = log(h / anchor_h)

        :param boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4].
        :param labels: (tensor) object class labels, sized [#obj,].
        :param input_size: (int/tuple) input size of the original image
        :return:
            loc_targets: (tensor) encoded bounding boxes, sized [#anchors, 4].
            cls_targets: (tensor) encoded class labels, sized [#anchors,].
        """
        input_size = torch.Tensor([input_size, input_size]) if isinstance(
            input_size, int) else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        loc_targets = loc_targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
        cls_targets = labels[max_ids]

        cls_targets[max_ious < 0.4] = 0
        ignore = (max_ious >= 0.4) & (max_ious < 0.5
                                      )  # ignore ious between [0:q.4, 0.5]
        cls_targets[ignore] = -1
        return loc_targets, cls_targets
Ejemplo n.º 21
0
    def encode(self, boxes, labels, input_size):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        NEG = 10
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        
        boxes = boxes[max_ids]

        loc_xy = (boxes[:,:2] - anchor_boxes[:,:2]) / anchor_boxes[:,2:]
        loc_wh = torch.log(boxes[:,2:] / anchor_boxes[:,2:])
        loc_targets = torch.cat([loc_xy,loc_wh], 1)
        
        # 这里,我们设置正负采样比例为 1:3
        cls_targets = 1 + labels[max_ids] # 类别等于label加1, 最开始初始化为正类
#         print(cls_targets)
        
        cls_targets[max_ious < 0.1] = 0
        ignore = (max_ious > 0.05) & (max_ious < 0.1)
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        '''
        cls_targets[max_ious < 0.1] = 0
#         print("cls_targets shape:", cls_targets.shape)
        pos = cls_targets > 0 
        n_pos = pos.data.float().sum().item()
#         print(n_pos)
        n_neg = NEG * n_pos if n_pos != 0 else NEG
        n_neg = int(n_neg)
#         print('n_neg',n_neg)
        
#         print(max_ious.shape)
        max_ious = max_ious.numpy().astype(np.float)
        neg_index = np.where(max_ious < 0.1)[0]
#         print("neg_index shape", neg_index.size)
#         print("neg_index", neg_index)
#         neg_index = neg_index.squeeze(1)
#         neg_index = neg_index.numpy().astype(np.int)
#         print("neg_index numpy shape", neg_index.shape)
        
        if neg_index.shape[0] > n_neg:
            disable_index = np.random.choice(
                neg_index, size=(len(neg_index) - n_neg), replace=False)
#             disable_index = disable_index.unsqueeze(1)
#             print("disable_index",disable_index.shape)
            disabel_index = torch.from_numpy(disable_index).float()
            cls_targets[disable_index] = -1
#         print("cls_targets",cls_targets)
#         pos_neg = cls_targets > -1  # exclude ignored anchors
#         print("pos_neg", pos_neg.data.float().sum().item())
# #         ignore = (max_ious > 0.05) & (max_ious<0.01)
# #         cls_targets[ignore] = -1  # for now just mark ignored to -1
        '''
        return loc_targets, cls_targets
Ejemplo n.º 22
0
def target_label_generate(labels, gta, mc):
    """
    generate target label matrix
    """
    RF = mc.receptive_field
    (W, H) = (mc.IMAGE_WIDTH, mc.IMAGE_HEIGHT)
    Anchors = mc.Anchors

    #print(anchor_box)
    target_matirx = np.zeros((W / RF, H / RF, Anchors))
    #load anchor box
    anchor_box = mc.Anchor_box

    #print(anchor_box.shape)
    #only valid anchors can be keep
    bbox_xy = utils.bboxtransform(anchor_box)
    #print(bbox_xy.shape)
    _allowed_border = mc._allowed_border
    inds_inside = np.where((bbox_xy[:, 0] >= -_allowed_border)
                           & (bbox_xy[:, 1] >= -_allowed_border)
                           & (bbox_xy[:, 2] < W + _allowed_border) &  # width
                           (bbox_xy[:, 3] < H + _allowed_border)  # height
                           )[0]
    out_inside = np.where((bbox_xy[:, 0] < -_allowed_border)
                          & (bbox_xy[:, 1] < -_allowed_border)
                          & (bbox_xy[:, 2] >= W + _allowed_border) &  # width
                          (bbox_xy[:, 3] >= H + _allowed_border)  # height
                          )[0]
    valid_anchors = anchor_box[inds_inside]
    #print(valid_anchors.shape)
    anchors = utils.coord2box(valid_anchors)
    groundtruth = utils.coord2box(gta)
    #print(len(anchors), len(groundtruth))
    num_of_anchors = len(anchors)
    num_of_gta = len(groundtruth)
    overlaps_table = np.zeros((num_of_anchors, num_of_gta))
    for i in range(num_of_anchors):
        for j in range(num_of_gta):
            overlaps_table[i, j] = utils.box_iou(anchors[i], groundtruth[j])
    #print(overlaps_table)
    #argmax overlaps for each groundtruth
    gt_argmax_overlaps = overlaps_table.argmax(axis=0)
    argmax_overlaps = overlaps_table.argmax(axis=1)
    #overlaps groundtruth
    gt_max_overlaps = overlaps_table[gt_argmax_overlaps,
                                     np.arange(overlaps_table.shape[1])]
    gt_argmax_overlaps = np.where(overlaps_table == gt_max_overlaps)[0]

    #used this to select postive/ negative/ no care samples
    max_overlaps = overlaps_table[np.arange(len(valid_anchors)),
                                  argmax_overlaps]
    target_labels = pick_samples(max_overlaps, gt_argmax_overlaps, mc)
    #subsampling, default subsampling methods is random sample
    target_labels = subsampling(target_labels, mc)

    #bbox delta label
    target_delta, bbox_in_w, bbox_out_w = target_bbox(out_inside,
                                                      valid_anchors,
                                                      gta[argmax_overlaps, :],
                                                      target_labels, mc)
    #UNMAP TO original feature images
    num_anchor_box_per_grid = mc.Anchors
    total_anchors = num_anchor_box_per_grid * H / RF * W / RF
    labels = unmap2original(target_labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = unmap2original(target_delta,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)
    bbox_inside_weights = unmap2original(bbox_in_w,
                                         total_anchors,
                                         inds_inside,
                                         fill=0)
    bbox_outside_weights = unmap2original(bbox_out_w,
                                          total_anchors,
                                          inds_inside,
                                          fill=0)

    labels = labels.reshape(
        (mc.IMAGE_HEIGHT // RF, mc.IMAGE_WIDTH // RF, mc.Anchors))
    rpn_labels = labels
    #print(rpn_labels.shape)

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((mc.IMAGE_HEIGHT//RF , mc.IMAGE_WIDTH//RF , mc.Anchors * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((mc.IMAGE_HEIGHT//RF , mc.IMAGE_WIDTH//RF , mc.Anchors * 4))
    #assert bbox_inside_weights.shape[2] == height
    #assert bbox_inside_weights.shape[3] == width

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((mc.IMAGE_HEIGHT//RF , mc.IMAGE_WIDTH//RF , mc.Anchors * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Ejemplo n.º 23
0
    def encode(self, boxes, center_points, labels, colls_with, dimensions,
               bins, sines, coses, input_size):
        bins = bins.squeeze(1)
        sines = sines.squeeze(1)
        coses = coses.squeeze(1)
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          colls_with: (tensor) whether the vehicle collides with the player agent, sized [#obj] (binary)
          dimensions: (tensor), sized [#obj, 3]
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        input_size = torch.Tensor([input_size,input_size]) if isinstance(input_size, int) \
                     else torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        try:
            boxes = change_box_order(boxes, 'xyxy2xywh')
        except:
            assert (0)
            print(
                "a vehicle-free frame, which should be eliminated in a clean dataset"
            )
            boxes = torch.Tensor([[0., 0., 0., 0.]])
            colls_with = torch.Tensor([0.])
            dimensions = torch.Tensor([[0., 0., 0.]])
            sines = torch.Tensor([0.])
            coses = torch.Tensor([0.])
            bins = torch.Tensor([0.])
            labels = torch.Tensor([0.])
            # orientations = torch.Tensor([0.])

        colls_with = torch.Tensor(colls_with)
        colls_with = colls_with
        dimensions = dimensions
        # orientations = orientations.float()

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)

        # select matching instance
        boxes = boxes[max_ids]
        center_points = center_points[max_ids]
        colls_with = colls_with[max_ids]
        dimensions = dimensions[max_ids]
        cls_targets = labels[max_ids]
        bins = bins[max_ids]
        sines = sines[max_ids]
        coses = coses[max_ids]
        # orientations = orientations[max_ids]

        # build offset referring to target anchors
        # print(boxes[0,0], "before more")
        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        # print(loc_xy[0, 0], loc_wh[0,0], "before")
        loc_targets = torch.cat([loc_xy, loc_wh], 1)  # sized [num_anchor, 4]
        center_xy = (center_points[:, :2] -
                     anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        center_depth = center_points[:, 2].unsqueeze(1)
        center_targets = torch.cat([center_xy, center_depth],
                                   1)  # sized [num_anchor, 3]

        # filter invalid or negative instance
        sines[max_ious < 0.5] = 0
        coses[max_ious < 0.5] = 0
        bins[max_ious < 0.5] = 0
        cls_targets[max_ious < 0.5] = 0
        colls_with[max_ious < 0.5] = 0
        dimensions[max_ious < 0.5] = 0
        # orientations[max_ious<0.5] = 0

        # ignore some not enough overlapped instances
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        colls_with[ignore] = -1
        dimensions[ignore] = -1
        bins[ignore] = -1

        # colls_with[ignore] = -1
        # print(loc_targets[0, 0], "in encoder")
        return loc_targets, cls_targets, center_targets, colls_with, dimensions, bins, sines, coses
Ejemplo n.º 24
0
    def encode(self, boxes, labels, input_size):
        '''
        Encode target bounding boxes and class labels.fm_d

        Implement the Faster RCNN box coder in 3D image:
          tz = (z - anchor_z) / anchor_d
          ty = (y - anchor_y) / anchor_h
          tx = (x - anchor_x) / anchor_w
          td = log(d / anchor_d)
          th = log(h / anchor_h)
          tx = log(w / anchor_w)

        Args:
          boxes: (tensor) bounding boxes of (zmin, ymin, xmin, zmax, ymax, xmax), sized [#obj, 6]
          labels: (tensor) object class labels, sized [#obj,]
          input_size: (int/tuple) model input size of (d, h, w)

        Returns:
          loc_targets: (tensor) encoded boudning boxes, sized [#anchors, 6]
          cls_targets: (tensor) encoded class labels, sized [#anchors,]
        '''
        if isinstance(input_size, int):
            input_size = torch.Tensor([input_size, input_size, input_size])
        else:
            input_size = torch.Tensor(input_size)
        anchor_boxes = self.get_anchor_boxes(input_size)  # (z, y, x, d, h, w)
        boxes = change_box_order(boxes, 'zyxzyx2zyxdhw')
        #print(boxes.size())
        ious = box_iou(anchor_boxes, boxes,
                       order="zyxdhw")  # num_anchors x objects
        max_ious, max_ids = ious.max(
            1
        )  # find the best object for each anchor, return ious_value and object index
        best_ious, best_ids = ious.max(
            0
        )  # find the best anchor for each object, return ious_value and anchor index
        boxes = boxes[max_ids]
        #print(boxes.size())

        loc_zyx = (boxes[:, :3] - anchor_boxes[:, :3]) / anchor_boxes[:, 3:]
        loc_dhw = boxes[:, 3:] / anchor_boxes[:, 3:]
        loc_dhw = loc_dhw.numpy()
        loc_dhw = np.log(loc_dhw)
        loc_dhw = torch.from_numpy(loc_dhw)
        loc_targets = torch.cat([loc_zyx, loc_dhw], 1)

        cls_targets = 1 + labels[
            max_ids]  # the background class = 0, so +1 for object classes
        #print(cls_targets.size())
        cls_targets[max_ious < 0.4] = 0

        for i in range(best_ids.size()[0]):
            cls_targets[best_ids[i]] = 1 + labels[i]

        ig_num = cls_targets.size()[0] - 100
        cls_targets_array = cls_targets.numpy()
        neg_idx = np.where(cls_targets_array == 0)
        if ig_num > len(neg_idx[0]):
            ig_num -= (ig_num - len(neg_idx[0]))
        ig_idx = np.random.choice(neg_idx[0], ig_num, replace=False)
        cls_targets_array[ig_idx] = -1
        cls_targets = torch.from_numpy(cls_targets_array)
        '''
        ignore = (max_ious > 0.15) & (max_ious < 0.4)
        cls_targets[ignore] = -1
         
        for i in range(best_ids.size()[0]):
            cls_targets[best_ids[i]] = 1 + labels[i]	
        '''
        return loc_targets, cls_targets
Ejemplo n.º 25
0
def mAP(true_cls,
        true_loc,
        pred_cls,
        pred_loc,
        iou_thre=0.5,
        num_class=2,
        ap_func=compute_ap):
    '''
    计算mAP,参考的是
        https://github.com/fizyr/keras-retinanet/blob/master/keras_retinanet/
        utils/eval.py
    args:
        true_cls,list of tensors,每个tensor的维度是(#obj_i,),值是0,1,...,
            代表真实的每张图片的多个objs的类别;
        true_loc,list of tensors,每个tensor的维度是(#obj, 4),mode=xyxy,
            真实的每张图片的每个ground truth bounding boxes的坐标;
        pred_cls,list of tensor,每个tensor的维度是(#anchor_remain, #class),
            预测的在每张图片上的预测框的得分,anchor_remain表示这是经过卡阈
            值、nms等剩下的预测框;
        pred_loc,list of tensor,每个tensor的维度是(#anchor_remain, 4),
            mode=xyxy,预测的框的loc,注意,这里list的len就是图片的数量;
        iou_thre,iou_thre,默认是0.5,用于匹配预测框和gtbb;
        num_class,分类个数;
        ap_func,使用precision和recall计算ap的函数,默认使用的是compute_ap,这个
            相比于sklearn.metrics.auc得到的结果要好一些;
    returns:
        APs,每个类的AP值;
        mAP,输出的是一个float的scalar,所有类的平均AP。
    '''
    # 把true objects转到pred objects相关的设备中
    device = pred_cls[0].device
    true_cls = [tc.to(device) for tc in true_cls]
    true_loc = [tl.to(device) for tl in true_loc]
    # 储存每一类的ap值
    aps = []
    num_imgs = len(true_cls)
    # 因为输入的是预测的每个类的分数,需要取最大得到预测的类和这个类的score
    pred_score = []
    pred_class = []
    for t in pred_cls:
        # 如果存在空的pred_cls(即没有预测框,直接使用max方法会报错)
        if len(t) == 0:
            pred_score.append(t.new_empty(0))
            pred_class.append(torch.zeros(0, dtype=torch.long,
                                          device=t.device))
        else:
            t_s, t_c = t.max(dim=1)
            pred_score.append(t_s)
            pred_class.append(t_c)
    # 对每个类分别计算ap
    for c in range(num_class):
        # 创建ndarray来储存是否是一个true positive的预测
        tp = np.zeros((0, ))
        # 记录这一类一共有多少个true objects,用于计算recall
        num_true_objs = 0.0
        # 记录所有的已经变换了顺序的scores,用于之后计算recall和precision
        all_scores_c_order = []
        for i in range(num_imgs):
            # 一张图片中属于此类的gtbb
            true_c_mask = true_cls[i] == c
            num_true_objs += true_c_mask.sum()  # 统计总的true的数量来计算recall
            true_loc_i_c = true_loc[i][true_c_mask]
            # 一张图片中属于此类的预测boxes
            pred_c_mask = pred_class[i] == c
            # 对一张图片中的所有预测框根据起scores进行排序,以便在选择匹配的gtbb
            #   时能够保证总是预测scores最大的匹配到gtbb,这里需要进行排序的有
            #   score、classes和locs
            pred_score_i_c, i_c_order = pred_score[i][pred_c_mask].sort(
                dim=0, descending=True)
            pred_loc_i_c = pred_loc[i][pred_c_mask][i_c_order]
            all_scores_c_order.append(pred_score_i_c)
            # 用于记录已经匹配到的gtbb的序号,每张图片重新记录
            detected_true_boxes = []
            # 如果这个类的预测框数量为0,则此循环不会运行,fp和tp是长度为0的
            #   array;如果此类的预测框数量不是0而是N,则会使fp和tp的长度增加
            #   至N。
            for d in pred_loc_i_c:
                # 如果这张图片上gtbb并没有这个类,则所有这类的预测都被看做是
                #   false positive
                if true_loc_i_c.size(0) == 0:
                    tp = np.append(tp, 0)
                    continue
                # 计算预测框和所有gtbb的IoU,并去最大的一个作为此预测框的预测对象
                ious = box_iou(d.unsqueeze(0), true_loc_i_c).squeeze(0)
                max_iou, max_idx = ious.max(dim=0)
                # 如果这个最大的IoU大于thre,则认为此预测框针对的正是这个gtbb,
                #   则认为这是个true postive(实际上认为是true postive还有一个
                #   条件是这个预测框的scores大于指定的阈值,但我们需要移动这个阈
                #   值来构建不同的recall和其对应的precision,所以这里先认为
                #   只要是匹配上了就是1,当之后变化阈值的时候只要把score小于阈值
                #   tp设为0、fp设为1即可,而本来都没有匹配上的永远是0)
                # 另外,记录这一张图片上已经匹配过的gtbb,之后再进行匹配的时候就
                #   不进行匹配了(这里有一些小问题,即我们考虑的时候没有先考虑score
                #   比较大的框,这样可能导致因为score比较小的框先把gtbb给占了而
                #   导致可能匹配的更好的score更高的预测框被认为是false postive,
                #   这样可能会拉低ap)
                if max_iou >= iou_thre and max_idx not in detected_true_boxes:
                    tp = np.append(tp, 1)
                    detected_true_boxes.append(max_idx)
                else:
                    tp = np.append(tp, 0)

        # 如果对于某一类,所有图片的gtbb中都没有这一类,则认为此类的ap是0,?
        if num_true_objs == 0.0:
            aps.append(0.)
            continue
        # 依据score进行再次进行总排序,计算每一类的recall、precision
        _, order = torch.cat(all_scores_c_order, dim=0).sort(dim=0,
                                                             descending=True)
        order = order.cpu().numpy()
        tp = tp[order]
        fp = 1 - tp
        # 逐个计算array的前n个元素中fp和tp的个数,这个可以看做在每个元素的间隔间
        #   变化阈值来使的低于此阈值的所有都被预测是0,则计算postive(不管是fp
        #   还是tp)只需要考虑前面就可以了。
        fp = fp.cumsum()
        tp = tp.cumsum()
        # 计算recall和precision
        recall = tp / num_true_objs.item()
        # --这里可能出现预测的里没有postive(比如我们把阈值卡的特别高的时候),
        #   当然这是tp也是0,但分母=0会使得无法计算,所以需要加一个eps来避免
        precision = tp / np.maximum((tp + fp), np.finfo(np.float64).eps)
        aps.append(ap_func(recall, precision))
    return aps, np.mean(aps)
Ejemplo n.º 26
0
def boxes_to_y(true_boxes, anchors, num_classes, image_wh, num_anchors=3):
    """
    transfer true boxes to yolo y format.
    Arguments:
        true_boxes: bbox absolute value in image_wh of one image,
                    value as (xmin, ymin, xmax, ymax, class), shape(?, 5).
        anchors: anchor boxe size array, shape(num_anchors, 2).
        num_classes: total class num.
        image_wh: true input image size of (w, h).
        
    Returns:
        y_true: list of yolo feature map fomat,
                shape(grid w, grid h, num_anchors, 5+num_classes),
                box xywh info normalize to (0, 1).
    """
    num_layers = anchors.shape[0] // num_anchors
    box_class = true_boxes[:, 4].astype(np.int32)
    xymin, xymax = true_boxes[:, 0:2], true_boxes[:, 2:4]

    input_size = np.array([416, 416])

    # calculate box center xy and wh, range(0, 416).
    boxes_wh = xymax - xymin
    boxes_xy = xymin + boxes_wh // 2

    # normalize to range(0, 1)
    boxes_xy = boxes_xy / image_wh

    # grid shape
    # e.g. [input_size//32, input_size//16, input_size//8] -> [[13, 13], [26, 26], [52, 52]]
    grid_wh = [input_size // (2**(5 - i)) for i in range(num_layers)]
    grid_boxes_xy = [boxes_xy * grid_wh[i] for i in range(num_layers)
                     ]  # to grid scale, range(0, grid_wh).
    grid_index = [np.floor(grid_boxes_xy[i]) for i in range(num_layers)]
    # boxes_xy = [(boxes_xy[i] - grid_index[i]) for i in range(num_layers)]  # size respect to one grid, range(0, 1).

    # true size of xy min max cordinates relative to grid left top corner.
    anchor_xymax = anchors / 2
    anchor_xymin = -anchor_xymax
    box_xymax = boxes_wh / 2
    box_xymin = -box_xymax

    # create y_true.
    y_true = [
        np.zeros(
            (1, grid_wh[i][1], grid_wh[i][0], num_anchors, 5 + num_classes),
            dtype='float32') for i in range(num_layers)
    ]

    # iterate on each box
    num_boxes = true_boxes.shape[0]
    for box_index in range(num_boxes):
        # calculate iou.
        box1 = np.concatenate([box_xymin[box_index],
                               box_xymax[box_index]]).reshape(1, -1)
        box2 = np.concatenate([anchor_xymin, anchor_xymax], axis=-1)
        iou = box_iou(box1, box2)

        # select the best anchor
        anchor_index = np.argmax(iou)
        layer_index = num_layers - 1 - anchor_index // num_anchors
        layer_anchor_index = anchor_index % num_anchors

        box_xy = boxes_xy[box_index]  # shape(2,)
        # box_wh = boxes_wh[box_index]/anchors[anchor_index]  # shape(2,)
        box_wh = boxes_wh[box_index] / image_wh  # shape(2,), range(0, 1)

        #  fill in y_true.
        w = grid_index[layer_index][box_index, 0].astype('int32')
        h = grid_index[layer_index][box_index, 1].astype('int32')
        y_true[layer_index][0, h, w, layer_anchor_index, :2] = box_xy
        y_true[layer_index][0, h, w, layer_anchor_index, 2:4] = box_wh
        y_true[layer_index][0, h, w, layer_anchor_index, 4:5] = 1
        y_true[layer_index][0, h, w, layer_anchor_index,
                            5 + box_class[box_index]] = 1

    return y_true
Ejemplo n.º 27
0
    def forward(self, preds, loc_targets, cls_targets, box_targets):
        '''
        Args:
          preds: (tensor) model outputs, sized [batch_size,150,fmsize,fmsize].
          loc_targets: (tensor) loc targets, sized [batch_size,5,4,fmsize,fmsize].
          cls_targets: (tensor) conf targets, sized [batch_size,5,20,fmsize,fmsize].
          box_targets: (list) box targets, each sized [#obj,4].

        Returns:
          (tensor) loss = SmoothL1Loss(loc) + SmoothL1Loss(iou) + SmoothL1Loss(cls)
        '''
        batch_size, _, fmsize, _ = preds.size()
        preds = preds.view(batch_size, 5, 4 + 1 + 20, fmsize, fmsize)

        ### loc_loss
        xy = preds[:, :, :2, :, :].sigmoid()  # x->sigmoid(x), y->sigmoid(y)
        wh = preds[:, :, 2:4, :, :].exp()
        loc_preds = torch.cat([xy, wh], 2)  # [N,5,4,13,13]

        pos = cls_targets.max(2)[0].squeeze() > 0  # [N,5,13,13]
        num_pos = pos.data.long().sum()
        mask = pos.unsqueeze(2).expand_as(
            loc_preds)  # [N,5,13,13] -> [N,5,1,13,13] -> [N,5,4,13,13]
        loc_loss = F.smooth_l1_loss(loc_preds[mask],
                                    loc_targets[mask],
                                    size_average=False)

        ### iou_loss
        iou_preds = preds[:, :, 4, :, :].sigmoid()  # [N,5,13,13]
        iou_targets = Variable(torch.zeros(
            iou_preds.size()).cuda())  # [N,5,13,13]
        box_preds = self.decode_loc(preds[:, :, :4, :, :])  # [N,5,4,13,13]
        box_preds = box_preds.permute(0, 1, 3, 4,
                                      2).contiguous().view(batch_size, -1,
                                                           4)  # [N,5*13*13,4]
        for i in range(batch_size):
            box_pred = box_preds[i]  # [5*13*13,4]
            box_target = box_targets[i]  # [#obj, 4]
            iou_target = box_iou(box_pred, box_target)  # [5*13*13, #obj]
            iou_targets[i] = iou_target.max(1)[0].view(5, fmsize,
                                                       fmsize)  # [5,13,13]

        mask = Variable(torch.ones(
            iou_preds.size()).cuda()) * 0.1  # [N,5,13,13]
        mask[pos] = 1
        iou_loss = F.smooth_l1_loss(iou_preds * mask,
                                    iou_targets * mask,
                                    size_average=False)

        ### cls_loss
        cls_preds = preds[:, :, 5:, :, :]  # [N,5,20,13,13]
        cls_preds = cls_preds.permute(0, 1, 3, 4, 2).contiguous().view(
            -1, 20)  # [N,5,20,13,13] -> [N,5,13,13,20] -> [N*5*13*13,20]
        cls_preds = F.softmax(cls_preds)  # [N*5*13*13,20]
        cls_preds = cls_preds.view(batch_size, 5, fmsize, fmsize, 20).permute(
            0, 1, 4, 2, 3)  # [N*5*13*13,20] -> [N,5,20,13,13]
        pos = cls_targets > 0
        cls_loss = F.smooth_l1_loss(cls_preds[pos],
                                    cls_targets[pos],
                                    size_average=False)

        print('%f %f %f' % (loc_loss.data[0] / num_pos, iou_loss.data[0] /
                            num_pos, cls_loss.data[0] / num_pos),
              end=' ')
        return (loc_loss + iou_loss + cls_loss) / num_pos