Example #1
0
 def init(self,
          time=5,
          batchsize=3,
          height=256,
          width=256,
          allow_low_quality_matches=False,
          bg_iou_threshold=0.4,
          fg_iou_threshold=0.5):
     self.batchsize = batchsize
     self.time = time
     self.height, self.width = height, width
     self.bg_iou_threshold = bg_iou_threshold
     self.fg_iou_threshold = fg_iou_threshold
     self.allow_low_quality_matches = allow_low_quality_matches
     self.num_classes = 3
     self.box_generators = [
         Animation(self.height, self.width, 3, max_objects=3, max_classes=3)
         for i in range(self.batchsize)
     ]
     self.box_coder = Anchors(
         allow_low_quality_matches=allow_low_quality_matches,
         bg_iou_threshold=bg_iou_threshold,
         fg_iou_threshold=fg_iou_threshold)
     self.fmaps = []
     for i in range(self.box_coder.num_levels):
         self.fmaps += [
             torch.zeros((self.batchsize, 1, self.height >>
                          (3 + i), self.width >> (3 + i)))
         ]
     targets = [[
         torch.from_numpy(self.box_generators[i].run())
         for i in range(self.batchsize)
     ] for t in range(self.time)]
     return targets
Example #2
0
    def pytestcase_all_gt_should_be_matched_even_low_iou(self):
        """
        Boxes with small iou should be matched
        :return:
        """
        box_coder = Anchors(allow_low_quality_matches=True)
        anchors_xyxy = torch.tensor(
            [[25, 25, 250, 250], [20, 20, 50, 50], [3, 3, 4, 4]],
            dtype=torch.float32)
        anchors = box.change_box_order(anchors_xyxy, 'xyxy2xywh')

        targets = torch.tensor([[120, 120, 250, 250, 1], [20, 20, 22, 22, 2]],
                               dtype=torch.float32)
        targets = [[targets]]

        _, cls_targets = box_coder.encode(anchors, anchors_xyxy, targets)
        assert len(torch.unique(
            cls_targets)) == 3  # first box and +1 is for background class
    def __init__(self,
                 feature_extractor=FPN,
                 rpn=BoxHead,
                 in_channels=3,
                 num_classes=2,
                 act='sigmoid',
                 ratios=[0.5, 1.0, 2.0],
                 scales=[1.0, 2**1. / 3, 2**2. / 3],
                 nlayers=0,
                 loss='_focal_loss'):
        super(SingleStageDetector, self).__init__()
        self.label_offset = 1 * (act == 'softmax')
        self.num_classes = num_classes
        self.in_channels = in_channels

        self.feature_extractor = feature_extractor(in_channels)

        self.box_coder = Anchors(num_levels=self.feature_extractor.levels,
                                 scales=scales,
                                 ratios=ratios,
                                 allow_low_quality_matches=False,
                                 variances=[1.0, 1.0],
                                 fg_iou_threshold=0.5,
                                 bg_iou_threshold=0.4)

        self.num_anchors = self.box_coder.num_anchors
        self.act = act

        if rpn == BoxHead:
            self.rpn = BoxHead(self.feature_extractor.cout,
                               self.box_coder.num_anchors,
                               self.num_classes + self.label_offset, act,
                               nlayers)
        elif rpn == SSDHead:
            self.rpn = SSDHead(self.feature_extractor.out_channel_list,
                               self.box_coder.num_anchors,
                               self.num_classes + self.label_offset, act)
        else:
            raise NotImplementedError()

        self.criterion = DetectionLoss(act + loss)
    def __init__(self,
                 feature_extractor=FPN,
                 rpn=BoxHead,
                 num_classes=2,
                 cin=2,
                 act='sigmoid',
                 ratios=[1.0],
                 scales=[1.0, 1.5]):
        super(TwoStageDetector, self).__init__()
        self.label_offset = 1 * (act == 'softmax')
        self.num_classes = num_classes
        self.cin = cin

        self.feature_extractor = feature_extractor(cin)

        self.box_coder = Anchors(pyramid_levels=[
            i for i in range(3, 3 + self.feature_extractor.levels)
        ],
                                 scales=scales,
                                 ratios=ratios,
                                 fg_iou_threshold=0.5,
                                 bg_iou_threshold=0.4)

        self.num_anchors = self.box_coder.num_anchors
        self.act = act

        self.first_stage = rpn(self.feature_extractor.cout,
                               self.box_coder.num_anchors,
                               1,
                               'sigmoid',
                               n_layers=0)

        feat_names = [
            'feat' + str(i) for i in range(self.feature_extractor.levels)
        ]
        self.roi_pool = pool.MultiScaleRoIAlign(feat_names, 5, 2)
        self.second_stage = FCHead(self.feature_extractor.cout * 5 * 5,
                                   self.num_classes + self.label_offset, act)
        self.criterion = DetectionLoss('sigmoid_focal_loss')
Example #5
0
    def __init__(self,
                 feature_extractor=MobileNetFPN,
                 rpn=BoxHead,
                 num_classes=2,
                 cin=2,
                 act='sigmoid'):
        super(RefinedDetector, self).__init__()
        self.label_offset = 1 * (act == 'softmax')
        self.num_classes = num_classes
        self.cin = cin

        self.feature_extractor = feature_extractor(cin)

        self.box_coder = Anchors(pyramid_levels=[
            i for i in range(3, 3 + self.feature_extractor.levels)
        ],
                                 scales=[1.0, 2**1. / 3, 2**2. / 3],
                                 ratios=[0.5, 1.0, 2.0],
                                 fg_iou_threshold=0.5,
                                 bg_iou_threshold=0.4)

        self.num_anchors = self.box_coder.num_anchors
        self.act = act

        self.rpn = rpn(self.feature_extractor.cout, self.box_coder.num_anchors,
                       self.num_classes + self.label_offset, act)

        # refinement
        self.feature_extractor2 = FeaturePyramidNetwork(
            [self.feature_extractor.cout] * self.feature_extractor.levels,
            self.feature_extractor.cout)
        self.rpn2 = rpn(self.feature_extractor.cout,
                        self.box_coder.num_anchors,
                        self.num_classes + self.label_offset, act)

        self.criterion = DetectionLoss('sigmoid_focal_loss')
Example #6
0
class TestAnchors(object):
    """
    test of box coder class.
    """
    def init(self,
             time=5,
             batchsize=3,
             height=256,
             width=256,
             allow_low_quality_matches=False,
             bg_iou_threshold=0.4,
             fg_iou_threshold=0.5):
        self.batchsize = batchsize
        self.time = time
        self.height, self.width = height, width
        self.bg_iou_threshold = bg_iou_threshold
        self.fg_iou_threshold = fg_iou_threshold
        self.allow_low_quality_matches = allow_low_quality_matches
        self.num_classes = 3
        self.box_generators = [
            Animation(self.height, self.width, 3, max_objects=3, max_classes=3)
            for i in range(self.batchsize)
        ]
        self.box_coder = Anchors(
            allow_low_quality_matches=allow_low_quality_matches,
            bg_iou_threshold=bg_iou_threshold,
            fg_iou_threshold=fg_iou_threshold)
        self.fmaps = []
        for i in range(self.box_coder.num_levels):
            self.fmaps += [
                torch.zeros((self.batchsize, 1, self.height >>
                             (3 + i), self.width >> (3 + i)))
            ]
        targets = [[
            torch.from_numpy(self.box_generators[i].run())
            for i in range(self.batchsize)
        ] for t in range(self.time)]
        return targets

    def assert_equal(self, x, y):
        assert (x - y).abs().max().item() == 0

    def abs_diff(self, x, y):
        diff = (x - y).abs()
        return diff, diff.max().item()

    def cat_diff(self, x, y):
        u1 = torch.unique(x, return_counts=True)[1]
        u2 = torch.unique(y, return_counts=True)[1]
        return u1 - u2

    def encode_sequential(self,
                          targets,
                          anchors,
                          anchors_xyxy,
                          fg_iou_threshold,
                          bg_iou_threshold,
                          allow_low_quality_matches,
                          remove_dummies=False):
        gt_padded, sizes = box.pack_boxes_list_of_list(targets)
        all_loc, all_cls = [], []
        for t in range(len(gt_padded)):
            gt_boxes, gt_labels = gt_padded[t, :, :4], gt_padded[t, :, -1]
            if remove_dummies:
                max_size = sizes[t]
                gt_boxes, gt_labels = gt_boxes[:max_size], gt_labels[:max_size]
            boxes, cls_t = box.assign_priors(gt_boxes, gt_labels, anchors_xyxy,
                                             fg_iou_threshold,
                                             bg_iou_threshold,
                                             allow_low_quality_matches)
            loc_t = box.bbox_to_deltas(boxes, anchors, [1, 1])
            all_loc.append(loc_t.unsqueeze(0))
            all_cls.append(cls_t.unsqueeze(0).long())
        all_loc = torch.cat(all_loc, dim=0)  # (N,#anchors,4)
        all_cls = torch.cat(all_cls, dim=0)  # (N,#anchors,C)
        return all_loc, all_cls

    def pytestcase_batch_box_iou(self):
        targets = self.init(7, 3)
        anchors, anchors_xyxy = self.box_coder(self.fmaps,
                                               (self.height, self.width))
        gt_padded, _ = box.pack_boxes_list_of_list(targets)
        gt_boxes = gt_padded[..., :4]
        batch_iou = box.batch_box_iou(anchors_xyxy, gt_boxes.clone())
        for t in range(len(gt_padded)):
            iou_t = box.box_iou(anchors_xyxy, gt_boxes[t])
            max_abs_diff = self.abs_diff(batch_iou[t], iou_t)[1]
            assert max_abs_diff == 0

    def pytestcase_batched_encode_only_best_quality(self):
        targets = self.init(3, 7, allow_low_quality_matches=False)
        anchors, anchors_xyxy = self.box_coder(self.fmaps,
                                               (self.height, self.width))
        loc_targets, cls_targets = self.box_coder.encode(
            anchors, anchors_xyxy, targets)
        loc_targets2, cls_targets2 = self.encode_sequential(
            targets,
            anchors,
            anchors_xyxy,
            self.box_coder.fg_iou_threshold,
            self.box_coder.bg_iou_threshold,
            self.box_coder.allow_low_quality_matches,
            remove_dummies=True)
        loc_diff, max_loc_diff = self.abs_diff(loc_targets, loc_targets2)
        cls_diff, max_cls_diff = self.abs_diff(cls_targets, cls_targets2)
        cat_diff = self.cat_diff(cls_targets, cls_targets2)

        assert max_loc_diff == 0
        assert max_cls_diff == 0
        assert cat_diff.abs().max() == 0

    def pytestcase_batched_encode_allow_low_quality(self):
        targets = self.init(3, 7, allow_low_quality_matches=True)
        anchors, anchors_xyxy = self.box_coder(self.fmaps,
                                               (self.height, self.width))
        loc_targets, cls_targets = self.box_coder.encode(
            anchors, anchors_xyxy, targets)
        loc_targets2, cls_targets2 = self.encode_sequential(
            targets,
            anchors,
            anchors_xyxy,
            self.box_coder.fg_iou_threshold,
            self.box_coder.bg_iou_threshold,
            self.box_coder.allow_low_quality_matches,
            remove_dummies=True)
        loc_diff, max_loc_diff = self.abs_diff(loc_targets, loc_targets2)
        cls_diff, max_cls_diff = self.abs_diff(cls_targets, cls_targets2)
        cat_diff = self.cat_diff(cls_targets, cls_targets2)

        assert max_loc_diff == 0
        assert max_cls_diff == 0
        assert cat_diff.abs().max() == 0

    def one_hot(self, y, num_classes):
        y2 = y.unsqueeze(2)
        fg = (y2 > 0).float()
        y_index = (y2 - 1).clamp_(0)
        t = torch.zeros((y.shape[0], y.shape[1], num_classes),
                        dtype=torch.float)
        t.scatter_(2, y_index, fg)
        return t

    def pytestcase_batched_decode_boxes(self):
        targets = self.init(1,
                            1,
                            allow_low_quality_matches=True,
                            bg_iou_threshold=0.4,
                            fg_iou_threshold=0.5)
        anchors, anchors_xyxy = self.box_coder(self.fmaps,
                                               (self.height, self.width))
        loc_targets, cls_targets = self.box_coder.encode(
            anchors, anchors_xyxy, targets)
        scores = self.one_hot(cls_targets, self.num_classes)

        self.box_coder.decode_func = self.box_coder.decode_per_image

        boxes1 = self.box_coder.decode(anchors, loc_targets.clone(), scores,
                                       self.batchsize, 0.99)

        self.box_coder.decode_func = self.box_coder.batched_decode

        boxes2 = self.box_coder.decode(anchors, loc_targets, scores,
                                       self.batchsize, 0.99)

        for t in range(self.time):
            for x, y in zip(boxes1[t], boxes2[t]):
                b, s, l = x
                b2, s2, l2 = y

                self.assert_equal(b, b2)
                self.assert_equal(s, s2)
                self.assert_equal(l, l2)

    def pytestcase_all_gt_should_be_matched_even_low_iou(self):
        """
        Boxes with small iou should be matched
        :return:
        """
        box_coder = Anchors(allow_low_quality_matches=True)
        anchors_xyxy = torch.tensor(
            [[25, 25, 250, 250], [20, 20, 50, 50], [3, 3, 4, 4]],
            dtype=torch.float32)
        anchors = box.change_box_order(anchors_xyxy, 'xyxy2xywh')

        targets = torch.tensor([[120, 120, 250, 250, 1], [20, 20, 22, 22, 2]],
                               dtype=torch.float32)
        targets = [[targets]]

        _, cls_targets = box_coder.encode(anchors, anchors_xyxy, targets)
        assert len(torch.unique(
            cls_targets)) == 3  # first box and +1 is for background class
class SingleStageDetector(nn.Module):
    def __init__(self,
                 feature_extractor=FPN,
                 rpn=BoxHead,
                 in_channels=3,
                 num_classes=2,
                 act='sigmoid',
                 ratios=[0.5, 1.0, 2.0],
                 scales=[1.0, 2**1. / 3, 2**2. / 3],
                 nlayers=0,
                 loss='_focal_loss'):
        super(SingleStageDetector, self).__init__()
        self.label_offset = 1 * (act == 'softmax')
        self.num_classes = num_classes
        self.in_channels = in_channels

        self.feature_extractor = feature_extractor(in_channels)

        self.box_coder = Anchors(num_levels=self.feature_extractor.levels,
                                 scales=scales,
                                 ratios=ratios,
                                 allow_low_quality_matches=False,
                                 variances=[1.0, 1.0],
                                 fg_iou_threshold=0.5,
                                 bg_iou_threshold=0.4)

        self.num_anchors = self.box_coder.num_anchors
        self.act = act

        if rpn == BoxHead:
            self.rpn = BoxHead(self.feature_extractor.cout,
                               self.box_coder.num_anchors,
                               self.num_classes + self.label_offset, act,
                               nlayers)
        elif rpn == SSDHead:
            self.rpn = SSDHead(self.feature_extractor.out_channel_list,
                               self.box_coder.num_anchors,
                               self.num_classes + self.label_offset, act)
        else:
            raise NotImplementedError()

        self.criterion = DetectionLoss(act + loss)

    def reset(self, mask=None):
        self.feature_extractor.reset(mask)

    def forward(self, x):
        xs = self.feature_extractor(x)
        return self.rpn(xs)

    def compute_loss(self, x, targets):
        xs = self.feature_extractor(x)
        loc_preds, cls_preds = self.rpn(xs)

        with torch.no_grad():
            anchors, anchors_xyxy = self.box_coder(xs, x.shape[-2:])
            loc_targets, cls_targets = self.box_coder.encode(
                anchors, anchors_xyxy, targets)

        assert cls_targets.shape[1] == cls_preds.shape[1]

        loc_loss, cls_loss = self.criterion(loc_preds, loc_targets, cls_preds,
                                            cls_targets)

        # att_loss = attention_loss(self.feature_extractor, x, targets, sequence_upsample, box_drawing)
        loss_dict = {'loc': loc_loss, 'cls_loss': cls_loss}

        return loss_dict

    def get_boxes(self, x, score_thresh=0.4):
        xs = self.feature_extractor(x)
        loc_preds, cls_preds = self.rpn(xs)
        cls_preds = self.rpn.probas(cls_preds)
        scores = cls_preds[..., self.label_offset:].contiguous()
        anchors, _ = self.box_coder(xs, x.shape[-2:])
        targets = self.box_coder.decode(anchors,
                                        loc_preds,
                                        scores,
                                        x.size(1),
                                        score_thresh=score_thresh)
        return targets

    @classmethod
    def mnist_vanilla_rnn(cls,
                          in_channels,
                          num_classes,
                          act='softmax',
                          loss='_ohem_loss'):
        return cls(Vanilla,
                   BoxHead,
                   in_channels,
                   num_classes,
                   act,
                   ratios=[1.0],
                   scales=[1.0, 1.5],
                   loss=loss)

    @classmethod
    def mnist_fb_rnn(cls,
                     in_channels,
                     num_classes,
                     act='sigmoid',
                     loss='_focal_loss'):
        return cls(FBN,
                   BoxHead,
                   in_channels,
                   num_classes,
                   act,
                   ratios=[1.0],
                   scales=[1.0, 1.5],
                   loss=loss)

    @classmethod
    def mnist_unet_rnn(cls,
                       in_channels,
                       num_classes,
                       act='sigmoid',
                       loss='_focal_loss'):
        return cls(FPN,
                   BoxHead,
                   in_channels,
                   num_classes,
                   act,
                   ratios=[1.0],
                   scales=[1.0, 1.5],
                   loss=loss)

    @classmethod
    def mobilenet_v2_fpn(cls,
                         in_channels,
                         num_classes,
                         act='sigmoid',
                         loss='_focal_loss',
                         nlayers=3):
        return cls(MobileNetFPN,
                   BoxHead,
                   in_channels,
                   num_classes,
                   act,
                   loss=loss,
                   nlayers=nlayers)

    @classmethod
    def resnet50_fpn(cls,
                     in_channels,
                     num_classes,
                     act='sigmoid',
                     loss='_focal_loss',
                     nlayers=3):
        return cls(ResNet50FPN,
                   BoxHead,
                   in_channels,
                   num_classes,
                   act,
                   loss=loss,
                   nlayers=nlayers)

    @classmethod
    def resnet50_ssd(cls,
                     in_channels,
                     num_classes,
                     act='sigmoid',
                     loss='_focal_loss',
                     nlayers=0):
        return cls(ResNet50SSD,
                   SSDHead,
                   in_channels,
                   num_classes,
                   act=act,
                   loss=loss,
                   nlayers=nlayers)
class TwoStageDetector(nn.Module):
    def __init__(self,
                 feature_extractor=FPN,
                 rpn=BoxHead,
                 num_classes=2,
                 cin=2,
                 act='sigmoid',
                 ratios=[1.0],
                 scales=[1.0, 1.5]):
        super(TwoStageDetector, self).__init__()
        self.label_offset = 1 * (act == 'softmax')
        self.num_classes = num_classes
        self.cin = cin

        self.feature_extractor = feature_extractor(cin)

        self.box_coder = Anchors(pyramid_levels=[
            i for i in range(3, 3 + self.feature_extractor.levels)
        ],
                                 scales=scales,
                                 ratios=ratios,
                                 fg_iou_threshold=0.5,
                                 bg_iou_threshold=0.4)

        self.num_anchors = self.box_coder.num_anchors
        self.act = act

        self.first_stage = rpn(self.feature_extractor.cout,
                               self.box_coder.num_anchors,
                               1,
                               'sigmoid',
                               n_layers=0)

        feat_names = [
            'feat' + str(i) for i in range(self.feature_extractor.levels)
        ]
        self.roi_pool = pool.MultiScaleRoIAlign(feat_names, 5, 2)
        self.second_stage = FCHead(self.feature_extractor.cout * 5 * 5,
                                   self.num_classes + self.label_offset, act)
        self.criterion = DetectionLoss('sigmoid_focal_loss')

    def reset(self):
        self.feature_extractor.reset()

    def forward(self, x, score_thresh=0.4):
        batchsize = x.size(1)
        xs = self.feature_extractor(x)
        loc_preds, cls_preds = self.first_stage(xs)
        anchors, anchors_xyxy = self.box_coder(xs)
        proposals = self.box_coder.decode(anchors,
                                          loc_preds,
                                          cls_preds.sigmoid(),
                                          batchsize,
                                          score_thresh=score_thresh)
        image_sizes = [x.shape[-2:]] * x.size(0) * x.size(1)
        sources = {'feat' + str(i): item for i, item in enumerate(xs)}
        rois, rois_xyxy, sizes, batch_index = self.gather_boxes(proposals)
        if len(rois) > 0:
            out = self.roi_pool(sources, rois, image_sizes)
            rois, rois_xyxy = torch.cat(rois), torch.cat(rois_xyxy)
            loc_preds2, cls_preds2 = self.second_stage(out)
        else:
            loc_preds2, cls_preds2, rois, rois_xyxy, batch_index = None, None, None, None, None

        out_dic = Struct(
            **{
                'first_stage':
                Struct(
                    **{
                        'loc': loc_preds,
                        'cls': cls_preds,
                        'proposals': proposals,
                        'rois': rois,
                        'rois_xyxy': rois_xyxy,
                        'sizes': sizes,
                        'idxs': batch_index
                    }),
                'second_stage':
                Struct(**{
                    'loc': loc_preds2,
                    'cls': cls_preds2
                }),
                'anchors':
                anchors,
                'anchors_xyxy':
                anchors_xyxy
            })
        return out_dic

    def gather_boxes(self, proposals):
        #this expects list of tensor of shape N, 4
        idxs = []
        sizes = []
        rois = []
        rois_xyxy = []
        stride = len(proposals)
        for t in range(len(proposals)):
            for i in range(len(proposals[t])):
                boxes, _, _ = proposals[t][i]
                num = len(boxes) if boxes is not None else 0
                sizes += [num]

                if num > 0:
                    boxes = boxes.detach()
                    rois_xyxy += [boxes]
                    rois += [box.change_box_order(boxes, 'xyxy2xywh')]
                    idxs += [t * stride + i] * num

        idxs = torch.LongTensor(idxs).to(rois[0].device)
        return rois, rois_xyxy, sizes, idxs

    def compute_loss(self, x, targets):
        out = self(x)

        #first stage loss
        with torch.no_grad():
            loc_targets, cls_targets = self.box_coder.encode(
                out.anchors, out.anchors_xyxy, binarize_targets(targets))
        loc_loss, cls_loss = self.criterion(out.first_stage.loc, loc_targets,
                                            out.first_stage.cls, cls_targets)
        loss_dict = {'loc1': loc_loss, 'cls1': cls_loss}

        #second stage loss
        if out.second_stage.loc is not None:
            with torch.no_grad():
                loc_targets2, cls_targets2 = self.box_coder.encode(
                    out.first_stage.rois, out.first_stage.rois_xyxy, targets)
            import pdb
            pdb.set_trace()
            loc_loss, cls_loss = self.criterion(out.second_stage.loc,
                                                loc_targets2,
                                                out.second_stage.cls,
                                                cls_targets2)
            loss_dict.update({'loc2': loc_loss, 'cls2': cls_loss})

        return loss_dict

    def get_boxes(self, x, score_thresh=0.4):
        batchsize = x.size(1)
        out = self(x)

        scores, idxs = out.second_stage.cls.sigmoid().max(dim=1)
        idxs = out.first_stage.batch_index * self.num_classes + idxs
        box_preds = box.deltas_to_bbox(out.second_stage.loc,
                                       out.first_stage.rois, [1, 1])
        boxes, scores, labels, batch_index = self.box_coder.batched_decode_with_idxs(
            box_preds, scores, idxs, self.num_anchors, self.num_classes,
            batchsize, 0.5, 0.5)
        targets = self.box_coder.flatten_box_list_to_list_of_list(
            boxes, scores, labels, batch_index, batchsize)

        return targets
Example #9
0
class RefinedDetector(nn.Module):
    def __init__(self,
                 feature_extractor=MobileNetFPN,
                 rpn=BoxHead,
                 num_classes=2,
                 cin=2,
                 act='sigmoid'):
        super(RefinedDetector, self).__init__()
        self.label_offset = 1 * (act == 'softmax')
        self.num_classes = num_classes
        self.cin = cin

        self.feature_extractor = feature_extractor(cin)

        self.box_coder = Anchors(pyramid_levels=[
            i for i in range(3, 3 + self.feature_extractor.levels)
        ],
                                 scales=[1.0, 2**1. / 3, 2**2. / 3],
                                 ratios=[0.5, 1.0, 2.0],
                                 fg_iou_threshold=0.5,
                                 bg_iou_threshold=0.4)

        self.num_anchors = self.box_coder.num_anchors
        self.act = act

        self.rpn = rpn(self.feature_extractor.cout, self.box_coder.num_anchors,
                       self.num_classes + self.label_offset, act)

        # refinement
        self.feature_extractor2 = FeaturePyramidNetwork(
            [self.feature_extractor.cout] * self.feature_extractor.levels,
            self.feature_extractor.cout)
        self.rpn2 = rpn(self.feature_extractor.cout,
                        self.box_coder.num_anchors,
                        self.num_classes + self.label_offset, act)

        self.criterion = DetectionLoss('sigmoid_focal_loss')

    def reset(self):
        self.feature_extractor.reset()

    def forward(self, x):
        xs = self.feature_extractor(x)
        ys = self.feature_extractor2(xs)
        return self.rpn(xs), self.rpn(ys)

    def compute_loss(self, x, targets):
        xs = self.feature_extractor(x)
        ys = self.feature_extractor2(xs)
        loc_preds, cls_preds = self.rpn(xs)
        loc_preds2, cls_preds2 = self.rpn(xs)

        with torch.no_grad():
            loc_targets, cls_targets = self.box_coder.encode(xs, targets)
        with torch.no_grad():
            anchors, anchors_xyxy = self.box_coder(xs)
            anchors2 = box.deltas_to_bbox(loc_preds, anchors)
            anchors2xyxy = box.change_box_order(anchors2, 'xywh2xyxy')
            loc_targets2, cls_targets2 = self.box_coder.encode_with_anchors(
                anchors2, anchors2xyxy, targets)
            #cls_targets *= cls_preds?

        assert cls_targets.shape[1] == cls_preds.shape[1]
        loc_loss, cls_loss = self.criterion(loc_preds, loc_targets, cls_preds,
                                            cls_targets)
        loc_loss2, cls_loss2 = self.criterion(loc_preds2, loc_targets2,
                                              cls_preds2, cls_targets2)
        loss_dict = {
            'loc': loc_loss,
            'cls_loss': cls_loss,
            'loc2': loc_loss2,
            'cls_loss2': cls_loss2
        }

        return loss_dict

    def get_refined_anchors(self, xs, loc_preds):
        anchors, _ = self.box_coder(xs)
        anchors2 = box.deltas_to_bbox(loc_preds, anchors)
        anchors2xyxy = box.change_box_order(anchors2, 'xywh2xyxy')
        return anchors2, anchors2xyxy

    def get_boxes(self, x, score_thresh=0.4):
        xs = self.feature_extractor(x)
        ys = self.feature_extractor2(xs)
        loc_preds, cls_preds = self.rpn2(ys)
        loc_preds2, cls_preds2 = self.rpn2(ys)

        anchors, anchorsxyxy = self.get_refined_anchors(xs, loc_preds)
        scores = cls_preds2[..., self.label_offset:].contiguous()
        targets = self.box_coder.decode_with_anchors(anchors,
                                                     anchorsxyxy,
                                                     loc_preds,
                                                     scores,
                                                     x.size(1),
                                                     score_thresh=score_thresh)
        return targets
Example #10
0
    def __init__(self,
                 feature_extractor=FPN,
                 num_classes=2,
                 cin=2,
                 height=300,
                 width=300,
                 act='sigmoid',
                 shared=True):
        super(SSD, self).__init__()
        self.num_classes = num_classes
        self.height, self.width = height, width
        self.cin = cin

        self.extractor = feature_extractor(cin)

        x = torch.randn(1, 1, self.cin, self.height, self.width)
        sources = self.extractor(x)

        if USE_ANCHOR_MODULE:
            self.box_coder = Anchors(
                pyramid_levels=[i for i in range(3, 3 + len(sources))],
                scales=[1.0, 1.5],
                ratios=[1],
                label_offset=1,
                fg_iou_threshold=0.5,
                bg_iou_threshold=0.4)

            self.num_anchors = self.box_coder.num_anchors
        else:
            self.fm_sizes, self.steps, self.box_sizes = get_box_params_fixed_size(
                sources, height, width)
            self.ary = float(width) / height
            self.aspect_ratios = [1]
            self.scales = [1, 1.5]
            self.num_anchors = len(self.aspect_ratios) * len(
                self.scales
            )  # self.num_anchors = 2 * len(self.aspect_ratios) + 2
            self.box_coder = SSDBoxCoder(self, 0.7, 0.4)

        self.aspect_ratios = []
        self.in_channels = [item.size(1) for item in sources]

        self.shared = shared
        self.act = act

        self.use_embedding_loss = False

        if self.shared:
            self.embedding_dims = 32

            self.loc_head = self._make_head(self.in_channels[0],
                                            self.num_anchors * 4)
            self.cls_head = self._make_head(
                self.in_channels[0], self.num_anchors * self.num_classes)
            if self.use_embedding_loss:
                self.emb_head = self._make_head(
                    self.in_channels[0],
                    self.num_anchors * self.embedding_dims)

            torch.nn.init.normal_(self.loc_head[-1].weight, std=0.01)
            torch.nn.init.constant_(self.loc_head[-1].bias, 0)

            if self.act == 'softmax':
                self.softmax_init(self.cls_head[-1])
            else:
                self.sigmoid_init(self.cls_head[-1])

        else:
            self.cls_layers = nn.ModuleList()
            self.reg_layers = nn.ModuleList()

            for i in range(len(self.in_channels)):
                self.reg_layers += [
                    nn.Conv2d(self.in_channels[i],
                              self.num_anchors * 4,
                              kernel_size=3,
                              padding=1,
                              stride=1)
                ]
                self.cls_layers += [
                    nn.Conv2d(self.in_channels[i],
                              self.num_anchors * self.num_classes,
                              kernel_size=3,
                              padding=1,
                              stride=1)
                ]

                for l in self.reg_layers:
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

                # Init for strong bias toward bg class for focal loss
                if self.act == 'softmax':
                    self.softmax_init(self.cls_layers[-1])
                else:
                    self.sigmoid_init(self.cls_layers[-1])

        self.criterion = SSDLoss(num_classes=num_classes,
                                 mode='focal',
                                 use_sigmoid=self.act == 'sigmoid',
                                 use_iou=False)

        self._forward = [self._forward_unshared, self._forward_shared][shared]
Example #11
0
 def __init__(self, anno_lines, input_shape, num_classes, batch_size):
     self.anno_lines = anno_lines
     self.input_shape = input_shape
     self.num_classes = num_classes
     self.batch_size = batch_size
     self.Anchor = Anchors()
Example #12
0
class DataGenerator:
    def __init__(self, anno_lines, input_shape, num_classes, batch_size):
        self.anno_lines = anno_lines
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.Anchor = Anchors()

    def data_generate(self):
        """
        data generator

        Returns:
            image_data: tf.tensor
            target_3:   p3 stage's outputs [[labels, delta, label_weights, box_weights]]
            target_4:   p4 stage's outputs [[labels, delta, label_weights, box_weights]]
            target_5:   p5 stage's outputs [[labels, delta, label_weights, box_weights]]
            target_6:   p6 stage's outputs [[labels, delta, label_weights, box_weights]]
            target_7:   p7 stage's outputs [[labels, delta, label_weights, box_weights]]
        """
        anchors = self.Anchor.anchors_generator(self.input_shape)

        n = len(self.anno_lines)
        i = 0

        while True:
            image_data = []
            true_box = []
            true_class = []
            for j in range(self.batch_size):
                if i == 0:
                    np.random.shuffle(self.anno_lines)
                image, boxes = get_random_data(self.anno_lines[i],
                                               self.input_shape)
                results = self.Anchor.anchors_target_total(
                    anchors, boxes, self.num_classes, self.input_shape)
                image_data.append(image)
                true_box.append(results[0])
                true_class.append(results[1])

                i = (i + 1) % n

            image_data = tf.stack(image_data, axis=0)
            true_box = tf.stack(true_box, axis=0)
            true_class = tf.stack(true_class, axis=0)

            yield [image_data,
                   [true_box, true_class]], tf.zeros(shape=(self.batch_size, ))

    # #########################################
    # this function has been deprecated
    # #########################################
    def data_generate_other(self):
        anchors = self.Anchor.anchors_generator(self.input_shape)

        n = len(self.anno_lines)
        i = 0

        while True:
            targets_3 = []
            targets_4 = []
            targets_5 = []
            targets_6 = []
            targets_7 = []
            image_data = []

            for j in range(self.batch_size):
                if i == 0:
                    np.random.shuffle(self.anno_lines)
                image, boxes = get_random_data(self.anno_lines[i],
                                               self.input_shape)
                results = self.Anchor.anchors_target_total(
                    anchors, boxes, self.num_classes, self.input_shape)
                image_data.append(image)
                targets_3.append(results[0])
                targets_4.append(results[1])
                targets_5.append(results[2])
                targets_6.append(results[3])
                targets_7.append(results[4])

                i = (i + 1) % n

            image_data = tf.stack(image_data, axis=0)

            yield [
                image_data,
                [targets_3, targets_4, targets_5, targets_6, targets_7]
            ], tf.zeros(shape=(2, ))