def __call__(self, img, box = None, label = None, mask = None, **kwargs): ''' image: A PIL image boxes: Bounding boxes, a tensor of dimensions (#objects, 4) labels: labels of object, a tensor of dimensions (#objects) difficulties: difficulties of detect object, a tensor of dimensions (#objects) Out: cropped image , new boxes, new labels, new difficulties ''' image = TF.to_tensor(img) masks = TF.to_tensor(mask) if mask is not None else mask original_h = image.size(1) original_w = image.size(2) while True: mode = random.choice(self.ratios) if mode is None: return { 'img': img, 'box': box, 'label': label, 'mask': mask} if box is not None: boxes = change_box_order(box, 'xywh2xyxy') boxes = torch.FloatTensor(boxes) labels = torch.LongTensor(label) else: boxes = None labels = None new_image = image new_boxes = boxes new_labels = labels new_mask = masks if mask is not None else mask for _ in range(50): # Crop dimensions: [0.3, 1] of original dimensions new_h = random.uniform(0.3*original_h, original_h) new_w = random.uniform(0.3*original_w, original_w) # Aspect ratio constraint b/t .5 & 2 if new_h/new_w < 0.5 or new_h/new_w > 2: continue #Crop coordinate left = random.uniform(0, original_w - new_w) right = left + new_w top = random.uniform(0, original_h - new_h) bottom = top + new_h crop = torch.FloatTensor([int(left), int(top), int(right), int(bottom)]) # Calculate IoU between the crop and the bounding boxes if boxes is not None: overlap = find_jaccard_overlap(crop.unsqueeze(0), boxes) #(1, #objects) overlap = overlap.squeeze(0) # If not a single bounding box has a IoU of greater than the minimum, try again if overlap.max().item() < mode: continue #Crop new_image = image[:, int(top):int(bottom), int(left):int(right)] #(3, new_h, new_w) new_masks = masks[:, int(top):int(bottom), int(left):int(right)] if masks is not None else masks #Center of bounding boxes if boxes is not None: center_bb = (boxes[:, :2] + boxes[:, 2:])/2.0 #Find bounding box has been had center in crop center_in_crop = (center_bb[:, 0] >left) * (center_bb[:, 0] < right ) *(center_bb[:, 1] > top) * (center_bb[:, 1] < bottom) #( #objects) if not center_in_crop.any(): continue #take matching bounding box new_boxes = boxes[center_in_crop, :] #take matching labels new_labels = labels[center_in_crop] #Use the box left and top corner or the crop's new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2]) #adjust to crop new_boxes[:, :2] -= crop[:2] new_boxes[:, 2:] = torch.min(new_boxes[:, 2:],crop[2:]) #adjust to crop new_boxes[:, 2:] -= crop[:2] new_boxes = change_box_order(new_boxes, 'xyxy2xywh') new_boxes = new_boxes.numpy() new_labels = new_labels.numpy() else: new_boxes = None new_masks = TF.to_pil_image(new_masks) if new_masks is not None else None return { 'img': TF.to_pil_image(new_image), 'box': new_boxes, 'label': new_labels, 'mask': new_masks}
def forward(self, predicted_locs, predicted_scores, boxes, labels): batch_size = predicted_locs.size(0) n_priors = self.priors_cxcy.size(0) n_classes = predicted_scores.size(2) true_locs = torch.zeros( (batch_size, n_priors, 4), dtype=torch.float).to(device) # [N, n_priors, 4] true_classes = torch.zeros( (batch_size, n_priors), dtype=torch.long).to(device) # [N, n_priors] # For each image of the batch for i in range(batch_size): n_objects = boxes[i].size(0) overlap = utils.find_jaccard_overlap( boxes[i], self.priors_xy) # [n_objects, n_prios] # create a vector with an object that has the max overlap for each prior overlap_for_each_prior, object_for_each_prior = overlap.max( dim=0) # [n_priros] # Problems: # 1. Suppose that there are some objects near each other. It is possible that one of # objects does not have any good overlap for anyone of the priors. Them it will not # apper in any of the object_for_each_prior vector # First, find the prior that has the maximum overlap for each object _, prior_for_each_object = overlap.max(dim=1) # [n_objects] # Then, assign each object to the corresponding maximum overlap prior object_for_each_prior[prior_for_each_object] = torch.LongTensor( range(n_objects)).to(device) # Then, assign maximum overlap for these objects overlap_for_each_prior[prior_for_each_object] = 1. # Get the labels for each prior label_for_each_prior = labels[i][ object_for_each_prior] # [n_priors] # Set as background all priors that have overlap less than overlap label_for_each_prior[ overlap_for_each_prior < self.threshold] = 0 # [n_priors] # Append image ground truth to the batch ground truth true_classes[i] = label_for_each_prior # Encode coordinates from xmin,ymin,xmax,ymax to center-offset true_locs[i] = utils.cxcy_to_gcxgcy( utils.xy_to_cxcy(boxes[i][object_for_each_prior]), self.priors_cxcy) # [n_priors, 4] # Identify priors taht are positive (object/non-background) positive_priors = true_classes != 0 # [N, n_priors] # Localization loss is computed only with positive (non-background) priors loc_loss = self.l1_loss(predicted_locs[positive_priors], true_locs[positive_priors]) # scalar value # Confidence loss is computed over positive priors and the most difficult negative priors in each image. # n_negative = neg_pos_ratio * n_positives # Take the n_negative priors with maximum loss # This is called Hard Negative Mining. It concentrates on hardest negatives in each image to minimize # the pos/neg imbalance # Number of positive and hard-negative priors per image n_positives = positive_priors.sum(dim=1) # [N] n_hard_negatives = self.neg_pos_ratio * n_positives # [N] # Calculate the loss for all priors conf_loss_all = self.cross_entropy_loss( predicted_scores.view(-1, n_classes), true_classes.view(-1)) # [N * n_priors] conf_loss_all = conf_loss_all.view(batch_size, n_priors) # [N, n_priors] # Get positive priors conf_loss_pos = conf_loss_all[positive_priors] # [n_positives] # Get hard-negative priors conf_loss_neg = conf_loss_all.clone() # [N, n_priors] # ignore positive priors conf_loss_neg[positive_priors] = 0 # sort based in the loss conf_loss_neg, _ = conf_loss_neg.sort(dim=1, descending=True) hardness_ranks = torch.LongTensor( range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to( device) # [N,8732] hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1) conf_loss_hard_neg = conf_loss_neg[hard_negatives] conf_loss = (conf_loss_pos.sum() + conf_loss_hard_neg.sum()) / n_positives.sum().float() # Final loss return (conf_loss + self.alpha * loc_loss)
def detect_objects(self, pred_locs, pred_score, min_score, max_overlap, top_k): batch_size = pred_locs.size(0) n_priors = self.priors_cxcy.size(0) pred_score = F.softmax(pred_score, dim=2) # [N, n_priors, n_classes] # lists to store predictions for all images batch_boxes = list() batch_labels = list() batch_scores = list() # for each image of the batch for i in range(batch_size): # Decoded from regression format to bounding box format decoded_locs = utils.gcxgcy_to_cxcy(pred_locs[i], self.priors_cxcy) decoded_locs = utils.cxcy_to_xy(decoded_locs) # lists to store predictions for an image image_boxes = list() image_labels = list() image_scores = list() # for each class for c in range(1, self.num_classes): # keep only predictions with scores above threshold class_scores = pred_score[i][:, c] # [n_priors] (FloatTensor) score_above_min = class_scores > min_score # [n_priors] (BoolTensor) n_score_above_min = score_above_min.sum().item() if n_score_above_min == 0: continue class_scores = class_scores[ score_above_min] # [n_qualified] (n_qualified <= n_priors) class_decoded_locs = decoded_locs[ score_above_min] # [n_qualified, 4] # Find overlap between each predicted box overlap = utils.find_jaccard_overlap(class_decoded_locs, class_decoded_locs) # Non-maximum supression suppress = torch.zeros((n_score_above_min), dtype=torch.bool).to(device) for box in range(class_decoded_locs.size(0)): # if suppress[box] == 1: continue suppress = suppress | (overlap[box] > max_overlap) suppress[box] = False image_boxes.append(class_decoded_locs[~suppress]) image_labels.append( torch.LongTensor( (~suppress).sum().item() * [c]).to(device)) image_scores.append(class_scores[~suppress]) # if there is no object, assign background for the image if len(image_boxes) == 0: image_boxes.append( torch.FloatTensor([[0, 0, 1, 1]]).to(device)) image_labels.append(torch.LongTensor([0]).to(device)) image_scores.append(torch.FloatTensor([0]).to(device)) # Create a single tensor image_boxes = torch.cat(image_boxes, dim=0) image_labels = torch.cat(image_labels, dim=0) image_scores = torch.cat(image_scores, dim=0) if image_scores.size(0) > top_k: image_scores, sort_ind = image_scores.sort(dim=0, descending=True) image_scores = image_scores[:top_k] image_boxes = image_boxes[sort_ind][:top_k] image_labels = image_labels[sort_ind][:top_k] batch_boxes.append(image_boxes) batch_labels.append(image_labels) batch_scores.append(image_scores) return batch_boxes, image_labels, image_scores
def main(): image_width = 224 image_height = 224 base_pretrained = None num_classes = 21 # build detector model = SSD_MobileNet.SSDMobileNet(base_pretrained, num_classes) # get priors of the model model = model.to(device) priors_boxes_cxcy = model.create_prior() priors_boxes_xy = utils.cxcy_to_xy(priors_boxes_cxcy) # ------------------------ # Dataloaders # ------------------------ #data_folder = "/home/feaf-seat-1/Documents/nesvera/object_detection/a-PyTorch-Tutorial-to-Object-Detection" data_folder = "/home/nesvera/Documents/neural_nets/object_detection/a-PyTorch-Tutorial-to-Object-Detection" train_dataset = datasets.PascalVOCDataset(data_folder, split='train', keep_difficult=True) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=1, shuffle=True, collate_fn=train_dataset.collate_fn, num_workers=2, pin_memory=True) for i in range(len(train_dataset)): print("priors: ", priors_boxes_cxcy.size()) (images, boxes, labels, _) = train_dataset[i] image = images.permute(1, 2, 0).numpy() boxes_cx_cy = utils.xy_to_cxcy(boxes) boxes_xy = utils.cxcy_to_xy(boxes_cx_cy) true_locs = torch.zeros( (1, boxes_xy.size(0), 4), dtype=torch.float).to(device) # [N, n_priors, 4] true_classes = torch.zeros( (1, boxes_xy.size(0)), dtype=torch.long).to(device) # [N, n_priors] n_objects = boxes.size(0) overlap = utils.find_jaccard_overlap( boxes, priors_boxes_xy) # [n_objects, n_prios] # create a vector with an object that has the max overlap for each prior overlap_for_each_prior, object_for_each_prior = overlap.max( dim=0) # [n_priros] # Problems: # 1. Suppose that there are some objects near each other. It is possible that one of # objects does not have any good overlap for anyone of the priors. Them it will not # apper in any of the object_for_each_prior vector # First, find the prior that has the maximum overlap for each object _, prior_for_each_object = overlap.max(dim=1) # [n_objects] # Then, assign each object to the corresponding maximum overlap prior object_for_each_prior[prior_for_each_object] = torch.LongTensor( range(n_objects)).to(device) # Then, assign maximum overlap for these objects overlap_for_each_prior[prior_for_each_object] = 1. # Encode coordinates from xmin,ymin,xmax,ymax to center-offset true_locs = utils.cxcy_to_gcxgcy( utils.xy_to_cxcy(boxes[object_for_each_prior]), priors_boxes_cxcy) # [n_priors, 4] true_locs = true_locs.unsqueeze(0) pred_cls = torch.ones((priors_boxes_xy.size(0), 21), dtype=torch.float).to(device) pred_cls = pred_cls.unsqueeze(0) det_boxes, det_labels, det_scores = model.detect_objects( true_locs, pred_cls, min_score=0, max_overlap=0.5, top_k=200) # bounding box for j in range(boxes.shape[0]): p0 = (int(boxes[j, 0] * image_width), int(boxes[j, 1] * image_height)) p1 = (int(boxes[j, 2] * image_width), int(boxes[j, 3] * image_height)) image = cv2.rectangle(image, p0, p1, (255, 0, 0), 3) image = image.get() decoded_locs = det_boxes[0] k = 0 #for k in range(decoded_locs.size(0)): while True: k += 100 p0 = (int(decoded_locs[k, 0] * image_width), int(decoded_locs[k, 1] * image_height)) p1 = (int(decoded_locs[k, 2] * image_width), int(decoded_locs[k, 3] * image_height)) img = cv2.rectangle(image.copy(), p0, p1, (0, 255, 0), 1) cv2.imshow("image", img) cv2.waitKey(0) '''