Beispiel #1
0
    def __init__(self, num_classes, **kwargs):
        super(MobilenetSSD512, self).__init__()
        encoder = MobileNetV2()

        self.layer0 = encoder.layer0
        self.layer1 = encoder.layer1
        self.layer2 = encoder.layer2
        self.layer3 = encoder.layer3
        self.layer4 = encoder.layer4
        self.layer5 = encoder.layer5
        self.layer6 = encoder.layer6
        self.layer7 = encoder.layer7

        self.conv6 = nn.Conv2d(320, 64, kernel_size=3, stride=2, padding=1)
        self.conv7 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)
        self.conv8 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)
        self.conv9 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)

        # Top-down layers
        self.toplayer = nn.Conv2d(320, 64, kernel_size=1, stride=1, padding=0)

        # Lateral layers
        self.latlayer1 = nn.Conv2d(96, 64, kernel_size=1, stride=1, padding=0)
        self.latlayer2 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0)

        # Smooth layers
        self.smooth1 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.smooth2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)

        self.num_classes = num_classes + 1  # Dummy class
        self.loc_head = self._make_head(self.num_anchors * 4)
        self.cls_head = self._make_head(self.num_anchors * self.num_classes)
        self.box_coder = FPNSSDBoxCoder()
    def __init__(self, num_classes, pretrained=True, **kwargs):
        super(FPNSSD512, self).__init__()
        self.fpn = FPN50()
        self.num_classes = num_classes + 1  # Dummy class
        self.loc_head = self._make_head(self.num_anchors * 4)
        self.cls_head = self._make_head(self.num_anchors * self.num_classes)
        self.box_coder = FPNSSDBoxCoder()

        resnet_state = resnet50(pretrained=pretrained).state_dict()
        self.fpn.load_state_dict(resnet_state, strict=False)
class BBoxMeanAP:
    def __init__(self, threshold=0.5):
        self.scores_per_image = []
        self.threshold = threshold
        self.box_coder = FPNSSDBoxCoder()

    def reset(self):
        self.scores_per_image = []

    def update(self, y_pred: Tensor, y_true: Tensor):
        true_ssd_bboxes = y_true[SSD_BBOXES_KEY].detach().cpu()
        pred_ssd_bboxes = y_pred[SSD_BBOXES_KEY].detach().cpu()
        pred_classes = y_pred[SSD_LABELS_KEY].detach().cpu()
        true_classes = y_true[SSD_LABELS_KEY].detach().cpu()

        pred_classes = pred_classes.softmax(dim=2)
        true_classes = one_hot(true_classes, num_classes=pred_classes.size(2))

        for pred_loc, pred_cls, true_loc, true_cls in zip(
                pred_ssd_bboxes, pred_classes, true_ssd_bboxes, true_classes):
            pred_bboxes, _, pred_conf = self.box_coder.decode(
                pred_loc, pred_cls)
            true_bboxes, _, _ = self.box_coder.decode(true_loc, true_cls)

            true_bboxes = change_box_order(true_bboxes, 'xyxy2xywh')
            pred_bboxes = change_box_order(pred_bboxes, 'xyxy2xywh')

            true_bboxes = to_numpy(true_bboxes)
            pred_bboxes = to_numpy(pred_bboxes)
            pred_conf = to_numpy(pred_conf)

            if len(true_bboxes) == 0:
                continue

            if len(pred_bboxes) == 0:
                score = 0
            else:
                score = map_iou(true_bboxes, pred_bboxes, pred_conf)

            self.scores_per_image.append(score)

    def __str__(self):
        return '%.4f' % self.value()

    def value(self):
        if len(self.scores_per_image) == 0:
            return 0
        return np.mean(self.scores_per_image)

    def log_to_tensorboard(self, saver: SummaryWriter, prefix, step):
        if len(self.scores_per_image) > 0:
            saver.add_scalar(prefix + '/value', self.value(), step)
            saver.add_histogram(prefix + '/histogram',
                                np.array(self.scores_per_image), step)
Beispiel #4
0
def test_sdd_box_coder():
    box_coder = FPNSSDBoxCoder()
    #
    boxes = [
        [20, 40, 80, 100],
        [200, 4, 300, 200],
        [100, 100, 160, 200],
        [50, 90, 175, 300],
    ]

    labels = [0, 0, 0, 0]

    loc_targets, cls_targets = box_coder.encode(
        torch.tensor(boxes, dtype=torch.float32),
        torch.tensor(labels, dtype=torch.float32))
    dec_boxes, dec_labels, dec_scores = box_coder.decode(
        loc_targets, cls_targets)
    print(dec_boxes, dec_labels, dec_scores)
Beispiel #5
0
def test_anchors_count():
    n = len(FPNSSDBoxCoder().anchor_boxes)
    print('Total number of anchor boxes SSD512', n)

    box_coder = RSSDBoxCoder(512, 512)
    n = len(box_coder.anchor_boxes)
    print('Total number of anchor boxes in RSSD512', n)

    box_coder = RSSDBoxCoder(768, 768)
    n = len(box_coder.anchor_boxes)
    print('Total number of anchor boxes in RSSD768', n)
Beispiel #6
0
def test_draw_rsdd_bboxes():
    box_coder = RSSDBoxCoder(768, 768)
    anchors = box_coder._get_anchor_wht()

    n = len(FPNSSDBoxCoder().anchor_boxes)
    print('Total number of anchor boxes SSD', len(box_coder.anchor_boxes))

    n = len(box_coder.anchor_boxes)
    print('Total number of anchor boxes in RSSD', len(box_coder.anchor_boxes))

    for i, wht_fm in enumerate(anchors):

        image = np.zeros((box_coder.image_height, box_coder.image_width, 3),
                         dtype=np.uint8)

        for wht in wht_fm:
            rbox = [
                box_coder.image_height // 2, box_coder.image_width // 2,
                wht[0], wht[1], wht[2]
            ]
            visualize_rbbox(image, rbox, (i * 28, 255, 0), thickness=1)

        cv2.imshow("Image" + str(i), image)
    cv2.waitKey(-1)
Beispiel #7
0
def test_ssd_synthetic():
    label_image = np.zeros((512, 512), dtype=np.uint8)

    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((100, 100), (100, 20), 0)), 1).astype(int), (1, 1, 1))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((200, 100), (100, 20), 45)), 1).astype(int), (2, 2, 2))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((100, 200), (100, 20), 90)), 1).astype(int), (3, 3, 3))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((200, 200), (100, 20), 135)), 1).astype(int), (4, 4, 4))

    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((100 + 200, 100), (20, 100), 0)), 1).astype(int), (5, 5, 5))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((200 + 200, 100), (20, 100), 45)), 1).astype(int), (6, 6, 6))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((100 + 200, 200), (20, 100), 90)), 1).astype(int), (7, 7, 7))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((200 + 200, 200), (20, 100), 135)), 1).astype(int), (8, 8, 8))

    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((100, 100 + 200), (100, 20), 17)), 1).astype(int), (9, 9, 9))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((200, 100 + 200), (100, 20), 49)), 1).astype(int), (10, 10, 10))
    cv2.fillConvexPoly(
        label_image,
        np.expand_dims(cv2.boxPoints(((100, 200 + 200), (100, 20), 99)),
                       1).astype(int), (11, 11, 11))
    # cv2.fillConvexPoly(label_image, np.expand_dims(cv2.boxPoints(((200, 200 + 200), (100, 20), 165)), 1).astype(int), (12, 12, 12))

    # cv2.fillConvexPoly(label_image, np.expand_dims(np.array([[25, 90], [125, 90], [125, 110], [25, 110]]), 1), (1, 1, 1))
    # cv2.fillConvexPoly(label_image, np.expand_dims(np.array([[10, 400], [70, 400], [70, 420], [10, 420]]), 1), (3, 3, 3))
    # cv2.fillConvexPoly(label_image, np.expand_dims(np.array([[100, 100], [200, 110], [200, 140], [110, 130]]), 1), (4, 4, 4))
    # cv2.fillConvexPoly(label_image, np.expand_dims(np.array([[300, 200], [400, 210], [410, 330], [310, 330]]), 1), (5, 5, 5))

    image = (label2rgb(label_image, bg_label=0) * 255).astype(np.uint8)

    # Test what happens if we rotate
    # image = np.rot90(image).copy()
    # label_image = np.rot90(label_image).copy()

    bboxes = instance_mask_to_bboxes(label_image)
    print(bboxes)

    labels = np.zeros(len(bboxes), dtype=np.intp)

    box_coder = FPNSSDBoxCoder()

    loc_targets, cls_targets, anchors = box_coder.encode(
        torch.from_numpy(bboxes).float(),
        torch.from_numpy(labels),
        return_anchors=True)
    print(loc_targets.shape, cls_targets.shape)

    cls_targets_one_hot = np.eye(2)[cls_targets]
    print(cls_targets_one_hot.shape)

    dec_boxes, dec_labels, dec_scores = box_coder.decode(
        loc_targets, torch.from_numpy(cls_targets_one_hot))
    print(dec_boxes)

    for bbox in dec_boxes.numpy():
        visualize_bbox(image, bbox, (255, 0, 255), thickness=3)

    for bbox in bboxes:
        visualize_bbox(image, bbox, (0, 255, 0), thickness=1)

    for bbox in anchors.numpy():
        visualize_bbox(image, bbox, (255, 255, 255), thickness=1)

    cv2.imshow('overlays', image)
    # cv2.imshow('anchors', anchors)
    cv2.waitKey(-1)
class FPNSSD512(nn.Module):
    num_anchors = 9

    def __init__(self, num_classes, pretrained=True, **kwargs):
        super(FPNSSD512, self).__init__()
        self.fpn = FPN50()
        self.num_classes = num_classes + 1  # Dummy class
        self.loc_head = self._make_head(self.num_anchors * 4)
        self.cls_head = self._make_head(self.num_anchors * self.num_classes)
        self.box_coder = FPNSSDBoxCoder()

        resnet_state = resnet50(pretrained=pretrained).state_dict()
        self.fpn.load_state_dict(resnet_state, strict=False)
        # new_state_dict = OrderedDict()
        # for k, v in resnet_state.items():
        #     if str.startswith(k, 'conv1'):
        #         continue
        #     new_state_dict[k] = v

    def forward(self, image):
        loc_preds = []
        cls_preds = []
        fms = self.fpn(image)
        for fm in fms:
            loc_pred = self.loc_head(fm)
            cls_pred = self.cls_head(fm)
            loc_pred = loc_pred.permute(0, 2, 3, 1).contiguous().view(
                image.size(0), -1,
                4)  # [N, 9*4,H,W] -> [N,H,W, 9*4] -> [N,H*W*9, 4]
            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(
                image.size(0), -1, self.num_classes
            )  # [N,9*NC,H,W] -> [N,H,W,9*NC] -> [N,H*W*9,NC]
            loc_preds.append(loc_pred)
            cls_preds.append(cls_pred)

        bboxes = torch.cat(loc_preds, 1)
        labels = torch.cat(cls_preds, 1)

        return bboxes, labels
        # return {
        #     SSD_BBOXES_KEY: bboxes,
        #     SSD_LABELS_KEY: labels,
        # }

    def _make_head(self, out_planes):
        layers = []
        for _ in range(4):
            layers.append(
                nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1))
            layers.append(nn.ReLU(True))
        layers.append(
            nn.Conv2d(256, out_planes, kernel_size=3, stride=1, padding=1))
        return nn.Sequential(*layers)

    def predict(self, image):
        import albumentations as A
        self.eval()

        normalize = A.Normalize()
        image = normalize(image=image)['image']

        slicer = ImageSlicer(image.shape, 512, 512 // 2)
        patches = [
            tensor_from_rgb_image(patch)
            for patch in slicer.split(image, borderType=cv2.BORDER_CONSTANT)
        ]
        offsets = torch.tensor([[crop[0], crop[1], crop[0], crop[1]]
                                for crop in slicer.bbox_crops],
                               dtype=torch.float32)

        all_bboxes = []
        all_labels = []

        with torch.set_grad_enabled(False):
            for patch, patch_loc in DataLoader(list(zip(patches, offsets)),
                                               batch_size=8,
                                               pin_memory=True):
                patch = patch.to(self.fpn.conv1.weight.device)
                bboxes, labels = self(patch)

                all_bboxes.extend(bboxes.cpu())
                all_labels.extend(labels.cpu())

        boxes, labels, scores = self.box_coder.decode_multi(
            all_bboxes, all_labels, offsets)
        return to_numpy(boxes), to_numpy(labels), to_numpy(scores)
Beispiel #9
0
class MobilenetSSD512(nn.Module):
    num_anchors = 9

    def __init__(self, num_classes, **kwargs):
        super(MobilenetSSD512, self).__init__()
        encoder = MobileNetV2()

        self.layer0 = encoder.layer0
        self.layer1 = encoder.layer1
        self.layer2 = encoder.layer2
        self.layer3 = encoder.layer3
        self.layer4 = encoder.layer4
        self.layer5 = encoder.layer5
        self.layer6 = encoder.layer6
        self.layer7 = encoder.layer7

        self.conv6 = nn.Conv2d(320, 64, kernel_size=3, stride=2, padding=1)
        self.conv7 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)
        self.conv8 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)
        self.conv9 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1)

        # Top-down layers
        self.toplayer = nn.Conv2d(320, 64, kernel_size=1, stride=1, padding=0)

        # Lateral layers
        self.latlayer1 = nn.Conv2d(96, 64, kernel_size=1, stride=1, padding=0)
        self.latlayer2 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0)

        # Smooth layers
        self.smooth1 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
        self.smooth2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)

        self.num_classes = num_classes + 1  # Dummy class
        self.loc_head = self._make_head(self.num_anchors * 4)
        self.cls_head = self._make_head(self.num_anchors * self.num_classes)
        self.box_coder = FPNSSDBoxCoder()

    def forward(self, image):

        # Extract features
        c0 = self.layer0(image)
        c1 = self.layer1(c0)
        c2 = self.layer2(c1)
        c3 = self.layer3(c2)
        c4 = self.layer4(c3)
        c5 = self.layer5(c4)
        c6 = self.layer6(c5)
        c7 = self.layer7(c6)

        # print(c0.size())
        # print(c2.size())
        # print(c3.size())
        # print(c4.size())
        # print(c5.size())
        # print(c6.size())
        # print(c7.size())

        p6 = self.conv6(c7)
        p7 = self.conv7(F.relu(p6))
        p8 = self.conv8(F.relu(p7))
        p9 = self.conv9(F.relu(p8))

        # Top-down
        p5 = self.toplayer(c7)
        p4 = self._upsample_add(p5, self.latlayer1(c5))
        p3 = self._upsample_add(p4, self.latlayer2(c3))
        p4 = self.smooth1(p4)
        p3 = self.smooth2(p3)

        features = [p3, p4, p5, p6, p7, p8, p9]

        loc_preds = []
        cls_preds = []

        for fm in features:
            loc_pred = self.loc_head(fm)
            cls_pred = self.cls_head(fm)
            loc_pred = loc_pred.permute(0, 2, 3, 1).contiguous().view(
                image.size(0), -1,
                4)  # [N, 9*4,H,W] -> [N,H,W, 9*4] -> [N,H*W*9, 4]
            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(
                image.size(0), -1, self.num_classes
            )  # [N,9*NC,H,W] -> [N,H,W,9*NC] -> [N,H*W*9,NC]
            loc_preds.append(loc_pred)
            cls_preds.append(cls_pred)

        bboxes = torch.cat(loc_preds, 1)
        labels = torch.cat(cls_preds, 1)

        return bboxes, labels

    def _upsample_add(self, x, y, scale_factor=2):
        '''Upsample and add two feature maps.

        Args:
          x: (Variable) top feature map to be upsampled.
          y: (Variable) lateral feature map.

        Returns:
          (Variable) added feature map.

        Note in PyTorch, when input size is odd, the upsampled feature map
        with `F.upsample(..., scale_factor=2, mode='nearest')`
        maybe not equal to the lateral feature map size.

        e.g.
        original input size: [N,_,15,15] ->
        conv2d feature map size: [N,_,8,8] ->
        upsampled feature map size: [N,_,16,16]

        So we choose bilinear upsample which supports arbitrary output sizes.
        '''
        _, _, H, W = y.size()
        # print(x.size(), y.size())
        return F.interpolate(x, scale_factor=2, mode='nearest') + y

    def _make_head(self, out_planes):
        layers = []
        for _ in range(4):
            layers.append(nn.Conv2d(64, 64, kernel_size=3, stride=1,
                                    padding=1))
            layers.append(nn.ReLU(True))
        layers.append(
            nn.Conv2d(64, out_planes, kernel_size=3, stride=1, padding=1))
        return nn.Sequential(*layers)

    def predict(self, image):
        import albumentations as A
        self.eval()

        normalize = A.Normalize()
        image = normalize(image=image)['image']

        slicer = ImageSlicer(image.shape, 512, 512 // 2)
        patches = [
            tensor_from_rgb_image(patch)
            for patch in slicer.split(image, borderType=cv2.BORDER_CONSTANT)
        ]
        offsets = torch.tensor([[crop[0], crop[1], crop[0], crop[1]]
                                for crop in slicer.bbox_crops],
                               dtype=torch.float32)

        all_bboxes = []
        all_labels = []

        with torch.set_grad_enabled(False):
            for patch, patch_loc in DataLoader(list(zip(patches, offsets)),
                                               batch_size=8,
                                               pin_memory=True):
                patch = patch.to(self.conv6.weight.device)
                bboxes, labels = self(patch)

                all_bboxes.extend(bboxes.cpu())
                all_labels.extend(labels.cpu())

        boxes, labels, scores = self.box_coder.decode_multi(
            all_bboxes, all_labels, offsets)
        return to_numpy(boxes), to_numpy(labels), to_numpy(scores)
 def __init__(self, threshold=0.5):
     self.scores_per_image = []
     self.threshold = threshold
     self.box_coder = FPNSSDBoxCoder()