Ejemplo n.º 1
0
    def predict(self, image, prob_threshold=0.5):
        """
        image: (N=1,3,H,W)
        """
        # ---------- debug
        assert isinstance(image, np.ndarray)
        # ---------- debug
        if self.training == True:
            raise Exception(
                "Do not call predict in training mode, you should call .eval() to set the model in eval mode!")
        original_image_size = image.shape[1:]
        image = adjust_image_size(image)
        new_image_size = image.shape[1:]
        image = image_normalize(image)
        image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2])

        image = Variable(torch.FloatTensor(image))
        if torch.cuda.is_available():
            image = image.cuda()

        features = self.feature_extractor(image)
        image_size = image.shape[2:]

        delta, score, anchor = self.rpn.forward(features, image_size)
        roi = self.rpn.predict(delta, score, anchor, image_size)
        # ------!!!!!!
        # print("roi number:", roi.shape[0])

        delta_per_class, score = self.head.forward(features, roi, image_size)
        bbox_out, class_out, prob_out = self.head.predict(roi, delta_per_class, score, image_size,
                                                          prob_threshold=prob_threshold)

        bbox_out = resize_bbox(bbox_out, new_image_size, original_image_size)

        return bbox_out, class_out, prob_out
Ejemplo n.º 2
0
    def loss(self, image, gt_bbox, gt_bbox_label):

        if self.training == False:
            raise Exception(
                "Do not call loss in eval mode, you should call .train() to set the model in train model!"
            )
        #-------- debug
        assert isinstance(image, np.ndarray)
        assert isinstance(gt_bbox, np.ndarray)
        assert isinstance(gt_bbox_label, np.ndarray)
        assert len(image.shape) == 3
        assert gt_bbox.shape[0] == gt_bbox_label.shape[0]
        #-------- debug

        original_image_size = image.shape[1:]
        image, gt_bbox = random_flip(image, gt_bbox, horizontal_random=True)

        image = adjust_image_size(image)  #resizing image size
        new_image_size = image.shape[1:]
        gt_bbox = resize_bbox(gt_bbox, original_image_size, new_image_size)

        image = image_normalize(image)  #normalising the images
        image = image.reshape(1, image.shape[0], image.shape[1],
                              image.shape[2])

        image = Variable(torch.FloatTensor(image))
        if torch.cuda.is_available():
            image = image.cuda()

        features = self.feature_extractor(
            image
        )  #this is like a function of aclass which lets us run the vg16 model on input images

        # rpn loss
        delta, score, anchor = self.rpn.forward(features, new_image_size)
        rpn_loss = self.rpn.loss(delta, score, anchor, gt_bbox, new_image_size)

        # print("rpn delta mean:", delta.data.cpu().numpy().mean())

        # head loss:
        roi = self.rpn.predict(delta, score, anchor, new_image_size)
        # make_proposal_target : this is used for training, just to find the target delta and class label for training
        sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi = self.proposal_target_creator.make_proposal_target(
            roi, gt_bbox, gt_bbox_label)

        # print("background:",(bbox_bg_label_for_sample_roi == 0).sum())
        # print("sample_roi number:", sample_roi.shape[0])

        delta_per_class, score = self.head.forward(features, sample_roi,
                                                   new_image_size)

        # print("head delta mean:", delta_per_class.data.cpu().numpy().mean())

        head_loss = self.head.loss(score, delta_per_class,
                                   target_delta_for_sample_roi,
                                   bbox_bg_label_for_sample_roi)

        return rpn_loss + head_loss
Ejemplo n.º 3
0
    def loss(self, image, gt_bbox, gt_bbox_label):
        """
        image: (C=3,H,W), pixels should be in range 0~1 and normalized.
        gt_bbox: (N2,4)
        gt_bbox_label: (N2,)
        """
        if self.training == False:
            raise Exception("Do not call loss in eval mode, you should call .train() to set the model in train model!")
        # -------- debug
        assert isinstance(image, np.ndarray)
        assert isinstance(gt_bbox, np.ndarray)
        assert isinstance(gt_bbox_label, np.ndarray)
        assert len(image.shape) == 3
        assert gt_bbox.shape[0] == gt_bbox_label.shape[0]
        # -------- debug

        original_image_size = image.shape[1:]
        image, gt_bbox = random_flip(image, gt_bbox, horizontal_random=True)

        image = adjust_image_size(image)
        new_image_size = image.shape[1:]
        gt_bbox = resize_bbox(gt_bbox, original_image_size, new_image_size)

        image = image_normalize(image)
        image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2])

        image = Variable(torch.FloatTensor(image))
        if torch.cuda.is_available():
            image = image.cuda()

        features = self.feature_extractor(image)

        # rpn loss
        delta, score, anchor = self.rpn.forward(features, new_image_size)
        rpn_loss = self.rpn.loss(delta, score, anchor, gt_bbox, new_image_size)

        # =====!!!!!
        # print("rpn delta mean:", delta.data.cpu().numpy().mean())

        # head loss:
        roi = self.rpn.predict(delta, score, anchor, new_image_size)
        sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi = self.proposal_target_creator.make_proposal_target(
            roi, gt_bbox, gt_bbox_label)

        # =====!!!!!!
        # print("background:",(bbox_bg_label_for_sample_roi == 0).sum())
        # print("sample_roi number:", sample_roi.shape[0])

        delta_per_class, score = self.head.forward(features, sample_roi, new_image_size)

        # =====!!!!!
        # print("head delta mean:", delta_per_class.data.cpu().numpy().mean())

        head_loss = self.head.loss(score, delta_per_class, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi)

        return rpn_loss + head_loss