def predict(self, image, prob_threshold=0.5): """ image: (N=1,3,H,W) """ # ---------- debug assert isinstance(image, np.ndarray) # ---------- debug if self.training == True: raise Exception( "Do not call predict in training mode, you should call .eval() to set the model in eval mode!") original_image_size = image.shape[1:] image = adjust_image_size(image) new_image_size = image.shape[1:] image = image_normalize(image) image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2]) image = Variable(torch.FloatTensor(image)) if torch.cuda.is_available(): image = image.cuda() features = self.feature_extractor(image) image_size = image.shape[2:] delta, score, anchor = self.rpn.forward(features, image_size) roi = self.rpn.predict(delta, score, anchor, image_size) # ------!!!!!! # print("roi number:", roi.shape[0]) delta_per_class, score = self.head.forward(features, roi, image_size) bbox_out, class_out, prob_out = self.head.predict(roi, delta_per_class, score, image_size, prob_threshold=prob_threshold) bbox_out = resize_bbox(bbox_out, new_image_size, original_image_size) return bbox_out, class_out, prob_out
def loss(self, image, gt_bbox, gt_bbox_label): if self.training == False: raise Exception( "Do not call loss in eval mode, you should call .train() to set the model in train model!" ) #-------- debug assert isinstance(image, np.ndarray) assert isinstance(gt_bbox, np.ndarray) assert isinstance(gt_bbox_label, np.ndarray) assert len(image.shape) == 3 assert gt_bbox.shape[0] == gt_bbox_label.shape[0] #-------- debug original_image_size = image.shape[1:] image, gt_bbox = random_flip(image, gt_bbox, horizontal_random=True) image = adjust_image_size(image) #resizing image size new_image_size = image.shape[1:] gt_bbox = resize_bbox(gt_bbox, original_image_size, new_image_size) image = image_normalize(image) #normalising the images image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2]) image = Variable(torch.FloatTensor(image)) if torch.cuda.is_available(): image = image.cuda() features = self.feature_extractor( image ) #this is like a function of aclass which lets us run the vg16 model on input images # rpn loss delta, score, anchor = self.rpn.forward(features, new_image_size) rpn_loss = self.rpn.loss(delta, score, anchor, gt_bbox, new_image_size) # print("rpn delta mean:", delta.data.cpu().numpy().mean()) # head loss: roi = self.rpn.predict(delta, score, anchor, new_image_size) # make_proposal_target : this is used for training, just to find the target delta and class label for training sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi = self.proposal_target_creator.make_proposal_target( roi, gt_bbox, gt_bbox_label) # print("background:",(bbox_bg_label_for_sample_roi == 0).sum()) # print("sample_roi number:", sample_roi.shape[0]) delta_per_class, score = self.head.forward(features, sample_roi, new_image_size) # print("head delta mean:", delta_per_class.data.cpu().numpy().mean()) head_loss = self.head.loss(score, delta_per_class, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi) return rpn_loss + head_loss
def loss(self, image, gt_bbox, gt_bbox_label): """ image: (C=3,H,W), pixels should be in range 0~1 and normalized. gt_bbox: (N2,4) gt_bbox_label: (N2,) """ if self.training == False: raise Exception("Do not call loss in eval mode, you should call .train() to set the model in train model!") # -------- debug assert isinstance(image, np.ndarray) assert isinstance(gt_bbox, np.ndarray) assert isinstance(gt_bbox_label, np.ndarray) assert len(image.shape) == 3 assert gt_bbox.shape[0] == gt_bbox_label.shape[0] # -------- debug original_image_size = image.shape[1:] image, gt_bbox = random_flip(image, gt_bbox, horizontal_random=True) image = adjust_image_size(image) new_image_size = image.shape[1:] gt_bbox = resize_bbox(gt_bbox, original_image_size, new_image_size) image = image_normalize(image) image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2]) image = Variable(torch.FloatTensor(image)) if torch.cuda.is_available(): image = image.cuda() features = self.feature_extractor(image) # rpn loss delta, score, anchor = self.rpn.forward(features, new_image_size) rpn_loss = self.rpn.loss(delta, score, anchor, gt_bbox, new_image_size) # =====!!!!! # print("rpn delta mean:", delta.data.cpu().numpy().mean()) # head loss: roi = self.rpn.predict(delta, score, anchor, new_image_size) sample_roi, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi = self.proposal_target_creator.make_proposal_target( roi, gt_bbox, gt_bbox_label) # =====!!!!!! # print("background:",(bbox_bg_label_for_sample_roi == 0).sum()) # print("sample_roi number:", sample_roi.shape[0]) delta_per_class, score = self.head.forward(features, sample_roi, new_image_size) # =====!!!!! # print("head delta mean:", delta_per_class.data.cpu().numpy().mean()) head_loss = self.head.loss(score, delta_per_class, target_delta_for_sample_roi, bbox_bg_label_for_sample_roi) return rpn_loss + head_loss