Exemple #1
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        anchor_boxes = self.anchor_boxes
        ious = box_iou(anchor_boxes, boxes)
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        boxes = change_box_order(boxes, 'xyxy2xywh')
        anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh')

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        # cls_targets[max_ious<0.5] = 0
        # ignore = (max_ious>0.4) & (max_ious<0.5)  # ignore ious between [0.4,0.5]
        # cls_targets[ignore] = -1                  # mark ignored to -1
        return loc_targets, cls_targets
Exemple #2
0
    def encode(self, boxes, labels):
        '''Encode target bounding boxes and class labels.

        SSD coding rules:
          tx = (x - anchor_x) / (variance[0]*anchor_w)
          ty = (y - anchor_y) / (variance[0]*anchor_h)
          tw = log(w / anchor_w) / variance[1]
          th = log(h / anchor_h) / variance[1]

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].

        Reference:
          https://github.com/chainer/chainercv/blob/master/chainercv/links/model/ssd/multibox_coder.py
        '''
        def argmax(x):
            '''Find the max value index(row & col) of a 2D tensor.'''
            v, i = x.max(0)
            j = v.max(0)[1].item()
            return (i[j], j)

        default_boxes = self.default_boxes_new  # xywh
        default_boxes = change_box_order(default_boxes, 'xywh2xyxy')

        ious = box_iou(default_boxes, boxes)  # [#anchors, #obj]
        index = torch.LongTensor(len(default_boxes)).fill_(-1)
        masked_ious = ious.clone()
        while True:
            i, j = argmax(masked_ious)
            if masked_ious[i, j] < 1e-6:
                break
            index[i] = j
            masked_ious[i, :] = 0
            masked_ious[:, j] = 0

        mask = (index < 0) & (ious.max(1)[0] >= 0.5)
        if mask.any():
            index[mask] = ious[mask.nonzero().squeeze()].max(1)[1]

        boxes = boxes[index.clamp(min=0)]  # negative index not supported
        boxes = change_box_order(boxes, 'xyxy2xywh')
        default_boxes = change_box_order(default_boxes, 'xyxy2xywh')

        variances = (0.1, 0.2)
        loc_xy = (boxes[:, :2] -
                  default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0]
        loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1]
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[index.clamp(min=0)]
        cls_targets[index < 0] = 0
        return loc_targets, cls_targets
Exemple #3
0
    def encode(self, boxes, labels):
        def argmax(x):
            v, i = x.max(0)
            # j = v.max(0)[1][0]
            j = v.max(0)[1].item()
            return (i[j], j)  # 第j个obj 以及第j个obj的最大anchors坐标

        default_boxes = self.default_boxes  # xywh
        default_boxes = change_box_order(default_boxes, 'xywh2xyxy')

        ious = box_iou(default_boxes, boxes)  # [#anchors, #obj]
        index = torch.LongTensor(len(default_boxes)).fill_(
            -1)  # 与anchor匹配的boxes坐标
        masked_ious = ious.clone()
        while True:
            i, j = argmax(masked_ious)
            if masked_ious[i, j] < 1e-6:
                break
            index[i] = j  #设置与anchor匹配度的boxes坐标
            masked_ious[i, :] = 0  # 设置设置过得roi为0,表示已经搜索过次roi, 对应于while里的条件
            masked_ious[:, j] = 0

        mask = (index < 0) & (
            ious.max(1)[0] >= 0.5
        )  # 没有在第一次进行匹配到的 并且 对于每一个anchor与任何boxes的roi大于0.5的
        if mask.any():  # 如果存在
            # index[mask] = ious[mask.nonzero().squeeze()].max(1)[1]
            index[mask] = ious[mask].max(1)[1]  #设置匹配 【1】表示使用坐标位置 对应于58行

        boxes = boxes[index.clamp(min=0)]  # negative index not supported
        boxes = change_box_order(boxes, 'xyxy2xywh')
        default_boxes = change_box_order(default_boxes, 'xyxy2xywh')

        variances = (0.1, 0.2)
        loc_xy = (boxes[:, :2] -
                  default_boxes[:, :2]) / default_boxes[:, 2:] / variances[0]
        loc_wh = torch.log(boxes[:, 2:] / default_boxes[:, 2:]) / variances[1]
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[index.clamp(min=0)]
        cls_targets[index < 0] = 0
        return loc_targets, cls_targets  # cls>0 的是正样本 其他为0 ; loc在cls=0的地方是无效值
Exemple #4
0
    def encode_(self, image, boxes, labels):
        '''Encode target bounding boxes and class labels.
        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)
        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (tuple) model input size of (w,h).
        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        '''
        anchor_boxes = self.default_boxes  # xywh
        anchor_boxes = change_box_order(anchor_boxes, 'xywh2xyxy')
        default_boxes_ = anchor_boxes

        ious = box_iou(anchor_boxes, boxes)
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        boxes = change_box_order(boxes, 'xyxy2xywh')
        anchor_boxes = change_box_order(anchor_boxes, 'xyxy2xywh')

        variances = (0.1, 0.2)
        # variances = (1, 1)
        loc_xy = (boxes[:, :2] -
                  anchor_boxes[:, :2]) / anchor_boxes[:, 2:] / variances[0]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) / variances[1]
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.5] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # mark ignored to -1
        # return loc_targets, cls_targets, self.default_boxes, default_boxes_
        return loc_targets, cls_targets
    def encode(self, boxes, labels, input_size):
        """Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
            tx = (x - anchor_x) / anchor_w
            ty = (y - anchor_y) / anchor_h
            tw = log(w / anchor_w)
            th = log(h / anchor_h)

        Args:
            boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4].
            labels: (tensor) object class labels, sized [#obj, ].
            input_size: (int/tuple) model input size of (w, h), should be the same.
        Returns:
            loc_trues: (tensor) encoded bounding boxes, sized [#anchors, 4].
            cls_trues: (tensor) encoded class labels, sized [#anchors, ].
        """
        input_size = _make_list_input_size(input_size)
        boxes = tf.reshape(boxes, [-1, 4])
        anchor_boxes = self._get_anchor_boxes(input_size)

        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes *= tf.tile(input_size, [2])  # scaled back to original size

        ious = box_iou(anchor_boxes, boxes, order='xywh')
        max_ids = tf.argmax(ious, axis=1)
        max_ious = tf.reduce_max(ious, axis=1)

        boxes = tf.gather(boxes,
                          max_ids)  # broadcast automatically, [#anchors, 4]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = tf.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_trues = tf.concat([loc_xy, loc_wh], 1)
        cls_trues = tf.gather(labels,
                              max_ids)  # TODO: check if needs add 1 here
        cls_trues = tf.where(max_ious < 0.5, tf.zeros_like(cls_trues),
                             cls_trues)
        ignore = (max_ious > 0.4) & (
            max_ious < 0.5)  # ignore ious between (0.4, 0.5), and marked as -1
        cls_trues = tf.where(ignore, tf.ones_like(cls_trues) * -1, cls_trues)
        cls_trues = tf.cast(cls_trues, tf.float32)
        return loc_trues, cls_trues
Exemple #6
0
    def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45):
        '''Decode predicted loc/cls back to real box locations and class labels.

        Args:
          loc_preds: (tensor) predicted loc, sized [#anchors,4].
          cls_preds: (tensor) predicted conf, sized [#anchors,#classes].
          score_thresh: (float) threshold for object confidence score.
          nms_thresh: (float) threshold for box nms.

        Returns:
          boxes: (tensor) bbox locations, sized [#obj,4].
          labels: (tensor) class labels, sized [#obj,].
        '''
        anchor_boxes = change_box_order(self.anchor_boxes, 'xyxy2xywh')
        xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:]
        box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1)

        boxes = []
        labels = []
        scores = []
        num_classes = cls_preds.size(1)
        for i in range(num_classes - 1):
            score = cls_preds[:, i + 1]  # class i corresponds to (i+1) column
            mask = score > score_thresh
            if not mask.any():
                continue
            box = box_preds[mask]
            score = score[mask]
            # print(box.size())
            # print(score.size())

            keep = box_nms(box, score, nms_thresh)
            boxes.append(box[keep])
            labels.append(torch.empty_like(keep).fill_(i))
            scores.append(score[keep])

        boxes = torch.cat(boxes, 0)
        labels = torch.cat(labels, 0)
        scores = torch.cat(scores, 0)
        return boxes, labels, scores
Exemple #7
0
    def encode(self,
               boxes,
               labels,
               input_size,
               pos_iou_threshold=0.5,
               neg_iou_threshold=0.4):
        """Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
            tx = (x - anchor_x) / anchor_w
            ty = (y - anchor_y) / anchor_h
            tw = log(w / anchor_w)
            th = log(h / anchor_h)

        Args:
            boxes: (tensor) bounding boxes of (xmin, ymin, xmax, ymax), sized [#obj, 4].
            labels: (tensor) object class labels, sized [#obj, ].
            input_size: (int/tuple) model input size of (w, h), should be the same.
        Returns:
            loc_trues: (tensor) encoded bounding boxes, sized [#anchors, 4].
            cls_trues: (tensor) encoded class labels, sized [#anchors, ].
        """

        input_size = _make_list_input_size(input_size)
        boxes = tf.reshape(boxes, [-1, 4])
        anchor_boxes = self._get_anchor_boxes(input_size)

        boxes = change_box_order(boxes, 'xyxy2xywh')
        boxes *= tf.tile(input_size, [
            2
        ])  # scaled back to original size    ####exchange these two lines????

        ious = box_iou(anchor_boxes, boxes,
                       order='xywh')  #[#anchor, num_bboxes]
        max_ids = tf.argmax(ious, axis=1)  #[#anchor,]
        max_ious = tf.reduce_max(ious, axis=1)  #[#anchor,]

        gboxes = tf.gather(boxes,
                           max_ids)  # broadcast automatically, [#anchors, 4]
        loc_xy = (gboxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = tf.log(gboxes[:, 2:] / anchor_boxes[:, 2:])
        loc_trues = tf.concat([loc_xy, loc_wh], 1)  #[#anchors, 4]

        cls_trues = tf.gather(labels,
                              max_ids)  # TODO: check if needs add 1 here

        cls_trues = tf.where(max_ious < pos_iou_threshold,
                             tf.zeros_like(cls_trues), cls_trues)
        ignore = (max_ious > neg_iou_threshold) & (
            max_ious < pos_iou_threshold
        )  # ignore ious between (0.4, 0.5), and marked as -1
        cls_trues = tf.where(ignore, tf.ones_like(cls_trues) * -1, cls_trues)
        cls_trues = tf.cast(cls_trues, tf.float32)

        ###################################################################################
        """second bigger iou """
        if conf.use_secondbig_loss_constrain:
            mask_ious = tf.one_hot(max_ids,
                                   tf.shape(ious, out_type=tf.int32)[1])
            ious -= mask_ious
            second_max_ids = tf.argmax(ious, axis=1)  #[#anchor,]
            sec_gboxes = tf.gather(
                boxes,
                second_max_ids)  # broadcast automatically, [#anchors, 4]
            se_loc_xy = (sec_gboxes[:, :2] -
                         anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
            se_loc_wh = tf.log(sec_gboxes[:, 2:] / anchor_boxes[:, 2:])
            sec_loc_trues = tf.concat([se_loc_xy, se_loc_wh], 1)
            loc_trues = tf.concat([loc_trues, sec_loc_trues], 1)

        ###################################################################################
        return loc_trues, cls_trues