Beispiel #1
0
    def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45):
        variances = (0.1, 0.2)
        xy = loc_preds[:, :2] * variances[0] * self.default_boxes[:, 2:] + self.default_boxes[:, :2]
        wh = torch.exp(loc_preds[:, 2:] * variances[1]) * self.default_boxes[:, 2:]
        box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1)

        boxes = []
        labels = []
        scores = []
        num_classes = cls_preds.size(1)
        for i in range(num_classes - 1):
            score = cls_preds[:, i + 1]  # class i corresponds to (i+1) column
            mask = score > score_thresh
            if not mask.any():
                continue
            box = box_preds[mask.nonzero().squeeze(1)]
            score = score[mask]

            keep = box_nms(box, score, nms_thresh)
            boxes.append(box[keep])
            labels.append(torch.LongTensor(len(box[keep])).fill_(i))
            scores.append(score[keep])

        boxes = torch.cat(boxes, 0)
        labels = torch.cat(labels, 0)
        scores = torch.cat(scores, 0)
        return boxes, labels, scores
Beispiel #2
0
    def decode(self, loc_preds, cls_preds, input_size):
        '''Decode outputs back to bouding box locations and class labels.

        Args:
          loc_preds: (tensor) predicted locations, sized [#anchors, 4].
          cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
          input_size: (tuple) model input size of (w,h).

        Returns:
          boxes: (tensor) decode box locations, sized [#obj,4].
          labels: (tensor) class labels for each box, sized [#obj,].
        '''
        CLS_THRESH = 0.5
        NMS_THRESH = 0.5

        input_size = torch.Tensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)  # xywh

        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]

        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_wh.exp() * anchor_boxes[:, 2:]
        boxes = torch.cat([xy - wh / 2, xy + wh / 2], 1)  # [#anchors,4]

        score, labels = cls_preds.sigmoid().max(1)  # [#anchors,]
        ids = score > CLS_THRESH
        ids = ids.nonzero().squeeze()  # [#obj,]
        keep = box_nms(boxes[ids], score[ids], threshold=NMS_THRESH)
        return boxes[ids][keep], labels[ids][keep]
Beispiel #3
0
    def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45):
        '''Decode predicted loc/cls back to real box locations and class labels.
        Args:
          loc_preds: (tensor) predicted loc, sized [8732,4].
          cls_preds: (tensor) predicted conf, sized [8732,21].
          score_thresh: (float) threshold for object confidence score.
          nms_thresh: (float) threshold for box nms.
        Returns:
          boxes: (tensor) bbox locations, sized [#obj,4].
          labels: (tensor) class labels, sized [#obj,].
        '''
        variances = (0.1, 0.2)
        # variances = (1, 1)

        xy = loc_preds[:, :2] * variances[
            0] * self.default_boxes[:, 2:] + self.default_boxes[:, :2]
        wh = torch.exp(
            loc_preds[:, 2:] * variances[1]) * self.default_boxes[:, 2:]
        box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1)

        boxes = []
        labels = []
        scores = []
        num_classes = cls_preds.size(1)
        for i in range(num_classes - 1):
            score = cls_preds[:, i + 1]  # class i corresponds to (i+1) column
            mask = score > score_thresh
            if not mask.any():
                continue
            box = box_preds[mask.nonzero().squeeze()]
            score = score[mask]

            keep = box_nms(box, score, nms_thresh)
            boxes.append(box[keep])
            labels.append(torch.LongTensor(len(box[keep])).fill_(i))
            scores.append(score[keep])

        try:
            boxes = torch.cat(boxes, 0)
            labels = torch.cat(labels, 0)
            scores = torch.cat(scores, 0)
        except:
            boxes = None
            labels = None
            scores = None

        return boxes, labels, scores
Beispiel #4
0
    def decode(self, loc_preds, cls_preds, score_thresh=0.6, nms_thresh=0.45):
        '''Decode predicted loc/cls back to real box locations and class labels.

        Args:
          loc_preds: (tensor) predicted loc, sized [#anchors,4].
          cls_preds: (tensor) predicted conf, sized [#anchors,#classes].
          score_thresh: (float) threshold for object confidence score.
          nms_thresh: (float) threshold for box nms.

        Returns:
          boxes: (tensor) bbox locations, sized [#obj,4].
          labels: (tensor) class labels, sized [#obj,].
        '''
        anchor_boxes = change_box_order(self.anchor_boxes, 'xyxy2xywh')
        xy = loc_preds[:, :2] * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = loc_preds[:, 2:].exp() * anchor_boxes[:, 2:]
        box_preds = torch.cat([xy - wh / 2, xy + wh / 2], 1)

        boxes = []
        labels = []
        scores = []
        num_classes = cls_preds.size(1)
        for i in range(num_classes - 1):
            score = cls_preds[:, i + 1]  # class i corresponds to (i+1) column
            mask = score > score_thresh
            if not mask.any():
                continue
            box = box_preds[mask]
            score = score[mask]
            # print(box.size())
            # print(score.size())

            keep = box_nms(box, score, nms_thresh)
            boxes.append(box[keep])
            labels.append(torch.empty_like(keep).fill_(i))
            scores.append(score[keep])

        boxes = torch.cat(boxes, 0)
        labels = torch.cat(labels, 0)
        scores = torch.cat(scores, 0)
        return boxes, labels, scores
    def decode(self,
               loc_preds,
               cls_preds,
               input_size=conf.input_size,
               cls_thred=conf.cls_thred,
               max_output_size=conf.max_output_size,
               nms_thred=conf.nms_thred,
               return_score=False,
               tf_box_order=True):
        """Decode outputs back to bouding box locations and class labels.

        Args:
            loc_preds: (tensor) predicted locations, sized [#anchors, 4].
            cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
            input_size: (int/tuple) model input size of (w, h), should be the same.
            cls_thred: class score threshold
            max_output_size: max output nums after nms
            nms_thred: non-maximum suppression threshold
            return_score: (bool) indicate whether to return score value.
            tf_box_order: (bool) True: [ymin, xmin, ymax, xmax]
                                False: [xmin, ymin, xmax, ymax]
        Returns:
            boxes: (tensor) decode box locations, sized [#obj, 4].
                            order determined by param: tf_box_order
            labels: (tensor) class labels for each box, sized [#obj, ].
            NOTE: #obj == min(#detected_objs, #max_output_size)
        """
        assert len(loc_preds.get_shape().as_list(
        )) == 2, 'Ensure the location input shape to be [#anchors, 4]'
        assert len(cls_preds.get_shape().as_list(
        )) == 2, 'Ensure the class input shape to be [#anchors, #classes]'

        input_size = _make_list_input_size(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]

        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = tf.exp(loc_wh) * anchor_boxes[:, 2:]
        boxes = tf.concat([xy - wh / 2, xy + wh / 2], 1)  # [#anchors, 4]

        labels = tf.argmax(cls_preds, 1)  # [#anchors, ]
        score = tf.reduce_max(tf.sigmoid(cls_preds), 1)

        ids = tf.cast(score > cls_thred, tf.int32)
        ids = tf.where(tf.not_equal(ids, 0))

        if not ids.numpy().any():  # Fail to detect, choose the max score
            ids = tf.expand_dims(tf.argmax(score), axis=-1)
        else:
            ids = tf.squeeze(ids, -1)
        if tf_box_order:
            # [ymin, xmin, ymax, xmax]
            boxes = tf.transpose(tf.gather(tf.transpose(boxes), [1, 0, 3, 2]))
            keep = tf.image.non_max_suppression(
                tf.gather(boxes, ids),
                tf.gather(score, ids),
                max_output_size=max_output_size,
                iou_threshold=nms_thred)
        else:
            # [xmin, ymin, xmax, ymax]
            keep = box_nms(tf.gather(boxes, ids),
                           tf.gather(score, ids),
                           threshold=nms_thred)

        def _index(t, index):
            """Gather tensor successively
            E.g., _index(boxes, [idx_1, idx_2]) = tf.gather(tf.gather(boxes, idx_1), idx_2)
            """
            if not isinstance(index, (tuple, list)):
                index = list(index)
            for i in index:
                t = tf.gather(t, i)
            return t

        if return_score:
            return _index(boxes, [ids, keep]), _index(labels,
                                                      [ids, keep]), _index(
                                                          score, [ids, keep])
        return _index(boxes, [ids, keep]), _index(labels, [ids, keep])
Beispiel #6
0
    def decode(self,
               loc_preds,
               cls_preds,
               input_size=conf.input_size,
               output_size=None,
               cls_thred=conf.cls_thred,
               max_output_size=conf.max_output_size,
               nms_thred=conf.nms_thred,
               return_score=True,
               tf_box_order=conf.tf_box_order):
        """Decode outputs back to bouding box locations and class labels.

        We obey the Faster RCNN box coder:
            tx = (x - anchor_x) / anchor_w
            ty = (y - anchor_y) / anchor_h
            tw = log(w / anchor_w)
            th = log(h / anchor_h)
        Args:
            loc_preds: (tensor) predicted locations, sized [#anchors, 4].
            cls_preds: (tensor) predicted class labels, sized [#anchors, #classes].
            input_size: (int/tuple) model input size of (w, h), should be the same.
            cls_thred: class score threshold
            max_output_size: max output nums after nms
            nms_thred: non-maximum suppression threshold
            return_score: (bool) indicate whether to return score value.
            tf_box_order: (bool) True: [ymin, xmin, ymax, xmax]
                                False: [xmin, ymin, xmax, ymax]
        Returns:
            boxes: (tensor) decode box locations, sized [#obj, 4].
                            order determined by param: tf_box_order
            labels: (tensor) class labels for each box, sized [#obj, ].
            NOTE: #obj == min(#detected_objs, #max_output_size)
        """

        input_size = _make_list_input_size(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)

        loc_xy = loc_preds[:, :2]
        loc_wh = loc_preds[:, 2:]

        xy = loc_xy * anchor_boxes[:, 2:] + anchor_boxes[:, :2]
        wh = tf.exp(loc_wh) * anchor_boxes[:, 2:]
        boxes = tf.concat([xy - wh / 2, xy + wh / 2], 1)  # [#anchors, 4]

        labels = tf.argmax(cls_preds, 1)  # [#anchors, ]
        #score = tf.reduce_max(tf.sigmoid(cls_preds), 1)
        score = tf.sigmoid(tf.reduce_max(cls_preds, 1))  ######### xpy

        #ids = tf.where(tf.greater_equal(score, cls_thred+0.6) & tf.less_equal(score,  cls_thred+0.8))
        ids = tf.cast(score > cls_thred, tf.int32)
        ids = tf.where(tf.not_equal(ids, 0))

        #if not ids.numpy().any():  # Fail to detect, choose the max score
        if ids.shape[0] == 0:  # Fail to detect, choose the max score
            ids = tf.expand_dims(tf.argmax(score), axis=-1)
            print(
                "!!! Box decode: Fail to detect, choose the max score !!!!!!!!!!!!!!!!!!"
            )
        else:
            ids = tf.squeeze(ids, -1)
            #print("Here!!!!!!!!!!!!!")
        if tf_box_order:
            # [ymin, xmin, ymax, xmax]
            boxes = tf.transpose(tf.gather(tf.transpose(boxes), [1, 0, 3, 2]))
            keep = tf.image.non_max_suppression(
                tf.gather(boxes, ids),
                tf.gather(score, ids),
                max_output_size=max_output_size,
                iou_threshold=nms_thred)
        else:
            # [xmin, ymin, xmax, ymax]
            keep = box_nms(tf.gather(boxes, ids),
                           tf.gather(score, ids),
                           threshold=nms_thred)

        def _index(t, index):
            """Gather tensor successively
            E.g., _index(boxes, [idx_1, idx_2]) = tf.gather(tf.gather(boxes, idx_1), idx_2)
            """
            if not isinstance(index, (tuple, list)):
                index = list(index)
            for i in index:
                t = tf.gather(t, i)
            #t = tf.gather(t, index[0])
            return t

        #return boxes,labels,score
        bboxes = _index(boxes, [ids, keep])
        if tf_box_order:
            bbox = tf.split(axis=1, num_or_size_splits=4, value=bboxes)
            bboxes = tf.concat([bbox[1], bbox[0], bbox[3], bbox[2]], axis=1)
            #bboxes[:, [0, 1, 2, 3]] = bboxes[:, [1, 0, 3, 2]]

        if return_score:
            return bboxes, _index(labels,
                                  [ids, keep]), _index(score, [ids, keep])
        return _index(boxes, [ids, keep]), _index(labels, [ids, keep])
Beispiel #7
0
    def decode___(self,
                  loc_preds,
                  cls_preds,
                  score_thresh=0.6,
                  nms_thresh=0.45):
        '''Decode predicted loc/cls back to real box locations and class labels.
        Args:
          loc_preds: (tensor) predicted loc, sized [8732,4].
          cls_preds: (tensor) predicted conf, sized [8732,21].
          score_thresh: (float) threshold for object confidence score.
          nms_thresh: (float) threshold for box nms.
        Returns:
          boxes: (tensor) bbox locations, sized [#obj,4].
          labels: (tensor) class labels, sized [#obj,].
        '''

        self.steps = (4, 8, 16, 32, 64, 128, 256, 512)
        self.box_sizes = (17.92, 35.84, 76.8, 153.6, 230.4, 307.2, 384.0,
                          460.8, 537.6)
        self.aspect_ratios = ((), (2, ), (2, ), (2, ), (2, ), (2, ), (2, ),
                              (2, ))

        boxes = []
        score = []
        for i in range(len(cls_preds)):
            cls_preds[i] = F.sigmoid(cls_preds[i].squeeze())
        for i in range(len(loc_preds)):
            oreg, ocls = loc_preds[i].squeeze().data.cpu(
            ), cls_preds[i].data.cpu()
            FH, FW, anchor_num = ocls.size()  # feature map size
            for Findex in range(FH * FW):
                windex, hindex = Findex % FW, Findex // FW
                cx = (windex + 0.5) * self.steps[i]
                cy = (hindex + 0.5) * self.steps[i]

                if ocls[hindex, windex, 0] > score_thresh:
                    s = self.box_sizes[i]
                    loc = oreg[hindex, windex, 0, :].unsqueeze(0)
                    prior = torch.Tensor([cx, cy, s, s]).unsqueeze(0)

                    variances = (1, 1)

                    xy = loc[:, :2] * variances[0] * prior[:,
                                                           2:] + prior[:, :2]
                    wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:, 2:]
                    boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1))
                    score.append(ocls[hindex, windex, 0])

                if ocls[hindex, windex, 1] > score_thresh:
                    s = math.sqrt(self.box_sizes[i] * self.box_sizes[i + 1])
                    loc = oreg[hindex, windex, 1, :].unsqueeze(0)
                    prior = torch.Tensor([cx, cy, s, s]).unsqueeze(0)

                    variances = (1, 1)

                    xy = loc[:, :2] * variances[0] * prior[:,
                                                           2:] + prior[:, :2]
                    wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:, 2:]
                    boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1))
                    score.append(ocls[hindex, windex, 1])

                s = self.box_sizes[i]
                for j, ar in enumerate(self.aspect_ratios[i]):
                    if ocls[hindex, windex, 2 + j * 2] > score_thresh:
                        loc = oreg[hindex, windex, 2 + j * 2, :].unsqueeze(0)
                        prior = torch.Tensor(
                            [cx, cy, s * math.sqrt(ar),
                             s / math.sqrt(ar)]).unsqueeze(0)
                        variances = (1, 1)

                        xy = loc[:, :2] * variances[
                            0] * prior[:, 2:] + prior[:, :2]
                        wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:,
                                                                          2:]
                        boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1))
                        score.append(ocls[hindex, windex, 2 + j * 2])

                    if ocls[hindex, windex, 2 + j * 2 + 1] > score_thresh:
                        loc = oreg[hindex, windex,
                                   2 + j * 2 + 1, :].unsqueeze(0)
                        prior = torch.Tensor(
                            [cx, cy, s / math.sqrt(ar),
                             s * math.sqrt(ar)]).unsqueeze(0)
                        variances = (1, 1)

                        xy = loc[:, :2] * variances[
                            0] * prior[:, 2:] + prior[:, :2]
                        wh = torch.exp(loc[:, 2:] * variances[1]) * prior[:,
                                                                          2:]
                        boxes.append(torch.cat([xy - wh / 2, xy + wh / 2], 1))
                        score.append(ocls[hindex, windex, 2 + j * 2 + 1])

        try:
            box = torch.cat(boxes, 0)
        except:
            boxes = None
            labels = None
            scores = None
            return boxes, labels, scores

        score = torch.Tensor(score)

        boxes = []
        labels = []
        scores = []

        keep = box_nms(box, score, nms_thresh)
        boxes.append(box[keep])
        labels.append(torch.LongTensor(len(box[keep])).fill_(i))
        scores.append(score[keep])

        boxes = torch.cat(boxes, 0)
        labels = torch.cat(labels, 0)
        scores = torch.cat(scores, 0)

        return boxes, labels, scores