def __init__(self, faster_rcnn, rpn_sigma=3.0, head_sigma=1.0):
     super().__init__()
     self.faster_rcnn = faster_rcnn
     self.rpn_sigma = rpn_sigma
     self.roi_sigma = head_sigma
     self.anchor_target_creator = AnchorTargetCreator()
     self.proposal_target_creator = ProposalTargetCreator()
     self.optimizer = faster_rcnn.get_optimizer()
Esempio n. 2
0
 def __init__(self, feature_extractor, rpn, head):
     super(_Faster_RCNN_Maker, self).__init__()
     self.feature_extractor = feature_extractor
     self.rpn = rpn
     self.head = head
     self.proposal_target_creator = ProposalTargetCreator()
Esempio n. 3
0
class _Faster_RCNN_Maker(nn.Module):
    def __init__(self, feature_extractor, rpn, head):
        super(_Faster_RCNN_Maker, self).__init__()
        self.feature_extractor = feature_extractor
        self.rpn = rpn
        self.head = head
        self.proposal_target_creator = ProposalTargetCreator()

    def forward(self):
        raise NotImplementedError(
            "Do not call forward directly! Instead, calling .loss in traininig phase and .predict in inference phase!"
        )

    def loss(self, image, gt_bbox, gt_bbox_label):

        if self.training == False:
            raise Exception(
                "Do not call loss in eval mode, you should call .train() to set the model in train model!"
            )
        #-------- debug
        assert isinstance(image, np.ndarray)
        assert isinstance(gt_bbox, np.ndarray)
        assert isinstance(gt_bbox_label, np.ndarray)
        assert len(image.shape) == 3
        assert gt_bbox.shape[0] == gt_bbox_label.shape[0]
        #-------- debug

        original_image_size = image.shape[1:]
        image, gt_bbox = random_flip(image, gt_bbox, horizontal_random=True)

        image = adjust_image_size(image)  #resizing image size
        new_image_size = image.shape[1:]
        gt_bbox = resize_bbox(gt_bbox, original_image_size, new_image_size)

        image = image_normalize(image)  #normalising the images
        image = image.reshape(1, image.shape[0], image.shape[1],
                              image.shape[2])

        image = Variable(torch.FloatTensor(image))
        if torch.cuda.is_available():
            image = image.cuda()

        features = self.feature_extractor(
            image
        )  #this is like a function of aclass which lets us run the vg16 model on input images

        # rpn loss
        delta, score, anchor = self.rpn.forward(features, new_image_size)
        rpn_loss = self.rpn.loss(delta, score, anchor, gt_bbox, new_image_size)

        # print("rpn delta mean:", delta.data.cpu().numpy().mean())

        # head loss:
        roi = self.rpn.predict(delta, score, anchor, new_image_size)
        # make_proposal_target : this is used for training, just to find the target delta and class label for training
        sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi = self.proposal_target_creator.make_proposal_target(
            roi, gt_bbox, gt_bbox_label)

        # print("background:",(bbox_bg_label_for_sample_roi == 0).sum())
        # print("sample_roi number:", sample_roi.shape[0])

        delta_per_class, score = self.head.forward(features, sample_roi,
                                                   new_image_size)

        # print("head delta mean:", delta_per_class.data.cpu().numpy().mean())

        head_loss = self.head.loss(score, delta_per_class,
                                   target_delta_for_sample_roi,
                                   bbox_bg_label_for_sample_roi)

        return rpn_loss + head_loss

    def predict(self, image, prob_threshold=0.5):

        assert isinstance(image, np.ndarray)
        #---------- debug
        if self.training == True:
            raise Exception(
                "Do not call predict in training mode, you should call .eval() to set the model in eval mode!"
            )
        original_image_size = image.shape[1:]
        image = adjust_image_size(image)
        new_image_size = image.shape[1:]
        image = image_normalize(image)
        image = image.reshape(1, image.shape[0], image.shape[1],
                              image.shape[2])

        image = Variable(torch.FloatTensor(image))
        if torch.cuda.is_available():
            image = image.cuda()

        features = self.feature_extractor(image)
        image_size = image.shape[2:]

        delta, score, anchor = self.rpn.forward(features, image_size)
        roi = self.rpn.predict(delta, score, anchor, image_size)

        # print("roi number:", roi.shape[0])

        delta_per_class, score = self.head.forward(features, roi, image_size)
        bbox_out, class_out, prob_out = self.head.predict(
            roi,
            delta_per_class,
            score,
            image_size,
            prob_threshold=prob_threshold)

        bbox_out = resize_bbox(bbox_out, new_image_size, original_image_size)

        return bbox_out, class_out, prob_out

    def get_optimizer(self, is_adam=False):
        lr = 0.001
        params = []
        for key, value in dict(self.named_parameters()).items():
            if value.requires_grad:
                if 'bias' in key:
                    params += [{
                        'params': [value],
                        'lr': lr * 2,
                        'weight_decay': 0
                    }]
                else:
                    params += [{
                        'params': [value],
                        'lr': lr,
                        'weight_decay': 0.0005
                    }]
        if False:
            self.optimizer = torch.optim.Adam(params)
        else:
            self.optimizer = torch.optim.SGD(params, momentum=0.9)
        return self.optimizer
Esempio n. 4
0
class Faster_RCNN(nn.Module):
    def __init__(self, feature_extractor, rpn, head):
        super(Faster_RCNN, self).__init__()
        self.feature_extractor = feature_extractor
        self.rpn = rpn
        self.head = head
        self.proposal_target_creator = ProposalTargetCreator()

    def forward(self):
        raise NotImplementedError("No Forward!")

    def loss(self, image, gt_bbox, gt_bbox_label):

        #         image: (C=3,H,W), pixels should be in range 0~1 and normalized.
        #         gt_bbox: (N2,4)
        #         gt_bbox_label: (N2,)

        if self.training == False:
            raise Exception("Only in train mode!")

        original_image_size = image.shape[1:]  #height and width
        image, gt_bbox = image_flip(image, gt_bbox, h_ran=True)

        image = image_size_transform(image)
        new_image_size = image.shape[1:]  # new H and W
        gt_bbox = bbox_resize(gt_bbox, original_image_size, new_image_size)

        image = image_normalize(image)
        image = image.reshape(1, image.shape[0], image.shape[1],
                              image.shape[2])

        image = Variable(torch.FloatTensor(image))
        if torch.cuda.is_available():
            image = image.cuda()

        features = self.feature_extractor(image)

        # rpn loss
        delta, score, anchor = self.rpn.forward(features, new_image_size)
        rpn_loss = self.rpn.loss(
            delta, score, anchor, gt_bbox,
            new_image_size)  #rpn_delta_loss + rpn_class_loss

        # head loss:
        roi = self.rpn.predict(delta, score, anchor,
                               new_image_size)  #umber of roi after nms = 2000
        sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi = self.proposal_target_creator.make_proposal_target(
            roi, gt_bbox,
            gt_bbox_label)  #gt_bbox_label: 20 classes     sample_roi = 256

        delta_per_class, score = self.head.forward(features, sample_roi,
                                                   new_image_size)

        head_loss = self.head.loss(score, delta_per_class,
                                   target_delta_for_sample_roi,
                                   bbox_bg_label_for_sample_roi)

        return rpn_loss + head_loss

    def predict(self, image, prob_threshold=0.5):

        if self.training == True:
            raise Exception("Only in eval mode!")
        original_image_size = image.shape[1:]
        image = image_size_transform(image)
        new_image_size = image.shape[1:]
        image = image_normalize(image)
        image = image.reshape(
            1, image.shape[0], image.shape[1],
            image.shape[2])  #batch_size, channel, high, width

        image = Variable(torch.FloatTensor(image))
        if torch.cuda.is_available():
            image = image.cuda()

        features = self.feature_extractor(image)  #channel=512
        image_size = image.shape[2:]

        delta, score, anchor = self.rpn.forward(features, image_size)
        roi = self.rpn.predict(delta, score, anchor, image_size)

        delta_per_class, score = self.head.forward(
            features, roi,
            image_size)  #contain roi pooling and fully connected layer
        bbox_out, class_out, prob_out = self.head.predict(
            roi,
            delta_per_class,
            score,
            image_size,
            prob_threshold=prob_threshold)

        bbox_out = bbox_resize(bbox_out, new_image_size, original_image_size)

        return bbox_out, class_out, prob_out

    def get_optimizer(self, is_adam=False):
        lr = 0.001
        params = []
        for key, value in dict(self.named_parameters()).items():
            if value.requires_grad:
                if 'bias' in key:
                    params += [{
                        'params': [value],
                        'lr': lr * 2,
                        'weight_decay': 0
                    }]
                else:
                    params += [{
                        'params': [value],
                        'lr': lr,
                        'weight_decay': 0.0005
                    }]
        if False:
            self.optimizer = torch.optim.Adam(params)
        else:
            self.optimizer = torch.optim.SGD(params, momentum=0.9)
        return self.optimizer
Esempio n. 5
0

if __name__ == '__main__':
    from model.utils.proposal_target_creator import ProposalTargetCreator
    extractor, head, output_feature_channel = get_vgg16_extractor_and_head(
        20, 7)
    features = Variable(torch.randn(1, 512, 50, 50))
    if torch.cuda.is_available():
        extractor, head, features = extractor.cuda(), head.cuda(
        ), features.cuda()

    rois = (np.random.rand(2000, 4) + [0, 0, 1, 1]) * 240
    gt_bbox = (np.random.rand(10, 4) + [0, 0, 1, 1]) * 240
    gt_bbox_label = np.random.randint(0, 20, size=10)

    proposal_target_creator = ProposalTargetCreator()
    sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi = proposal_target_creator.make_proposal_target(
        rois, gt_bbox, gt_bbox_label)

    delta_per_class, score = head.forward(features,
                                          sample_roi,
                                          image_size=(500, 500))
    loss = head.loss(score, delta_per_class, target_delta_for_sample_roi,
                     bbox_bg_label_for_sample_roi)
    print(loss)
    loss.backward()

    rois = (np.random.rand(300, 4) + [0, 0, 1, 1]) * 240
    delta_per_class, score = head.forward(features,
                                          rois,
                                          image_size=(500, 500))