Exemple #1
0
    def __call__(self, img, box = None, label = None, mask = None, **kwargs):
        '''
        image: A PIL image
        boxes: Bounding boxes, a tensor of dimensions (#objects, 4)
        labels: labels of object, a tensor of dimensions (#objects)
        difficulties: difficulties of detect object, a tensor of dimensions (#objects)
        
        Out: cropped image , new boxes, new labels, new difficulties
        '''
      
        image = TF.to_tensor(img)
        masks = TF.to_tensor(mask) if mask is not None else mask
        original_h = image.size(1)
        original_w = image.size(2)

        while True:
            mode = random.choice(self.ratios)

            if mode is None:
                return {
                    'img': img,
                    'box': box,
                    'label': label,
                    'mask': mask}

            if box is not None:
                boxes = change_box_order(box, 'xywh2xyxy')
                boxes = torch.FloatTensor(boxes)
                labels = torch.LongTensor(label)
            else:
                boxes = None
                labels = None
            
                
            new_image = image
            new_boxes = boxes
            new_labels = labels
            new_mask = masks if mask is not None else mask

            for _ in range(50):
                # Crop dimensions: [0.3, 1] of original dimensions
                new_h = random.uniform(0.3*original_h, original_h)
                new_w = random.uniform(0.3*original_w, original_w)

                # Aspect ratio constraint b/t .5 & 2
                if new_h/new_w < 0.5 or new_h/new_w > 2:
                    continue

                #Crop coordinate
                left = random.uniform(0, original_w - new_w)
                right = left + new_w
                top = random.uniform(0, original_h - new_h)
                bottom = top + new_h
                crop = torch.FloatTensor([int(left), int(top), int(right), int(bottom)])

                # Calculate IoU  between the crop and the bounding boxes
                if boxes is not None:
                    overlap = find_jaccard_overlap(crop.unsqueeze(0), boxes) #(1, #objects)
                    overlap = overlap.squeeze(0)
                    # If not a single bounding box has a IoU of greater than the minimum, try again
                    if overlap.max().item() < mode:
                        continue

                #Crop
                new_image = image[:, int(top):int(bottom), int(left):int(right)] #(3, new_h, new_w)
                new_masks = masks[:, int(top):int(bottom), int(left):int(right)] if masks is not None else masks

                #Center of bounding boxes
                if boxes is not None:
                    center_bb = (boxes[:, :2] + boxes[:, 2:])/2.0

                    #Find bounding box has been had center in crop
                    center_in_crop = (center_bb[:, 0] >left) * (center_bb[:, 0] < right
                                    ) *(center_bb[:, 1] > top) * (center_bb[:, 1] < bottom)    #( #objects)

                    if not center_in_crop.any():
                        continue

                    #take matching bounding box
                    new_boxes = boxes[center_in_crop, :]

                    #take matching labels
                    new_labels = labels[center_in_crop]

                    #Use the box left and top corner or the crop's
                    new_boxes[:, :2] = torch.max(new_boxes[:, :2], crop[:2])

                    #adjust to crop
                    new_boxes[:, :2] -= crop[:2]

                    new_boxes[:, 2:] = torch.min(new_boxes[:, 2:],crop[2:])

                    #adjust to crop
                    new_boxes[:, 2:] -= crop[:2]
                
                    new_boxes = change_box_order(new_boxes, 'xyxy2xywh')
                    new_boxes = new_boxes.numpy()
                    new_labels = new_labels.numpy()
                else:
                    new_boxes = None

                new_masks = TF.to_pil_image(new_masks) if new_masks is not None else None

                return {
                        'img': TF.to_pil_image(new_image),
                        'box': new_boxes,
                        'label': new_labels,
                        'mask': new_masks}
Exemple #2
0
    def forward(self, predicted_locs, predicted_scores, boxes, labels):

        batch_size = predicted_locs.size(0)
        n_priors = self.priors_cxcy.size(0)
        n_classes = predicted_scores.size(2)

        true_locs = torch.zeros(
            (batch_size, n_priors, 4),
            dtype=torch.float).to(device)  # [N, n_priors, 4]
        true_classes = torch.zeros(
            (batch_size, n_priors),
            dtype=torch.long).to(device)  # [N, n_priors]

        # For each image of the batch
        for i in range(batch_size):
            n_objects = boxes[i].size(0)

            overlap = utils.find_jaccard_overlap(
                boxes[i], self.priors_xy)  # [n_objects, n_prios]

            # create a vector with an object that has the max overlap for each prior
            overlap_for_each_prior, object_for_each_prior = overlap.max(
                dim=0)  # [n_priros]

            # Problems:
            # 1. Suppose that there are some objects near each other. It is possible that one of
            # objects does not have any good overlap for anyone of the priors. Them it will not
            # apper in any of the object_for_each_prior vector

            # First, find the prior that has the maximum overlap for each object
            _, prior_for_each_object = overlap.max(dim=1)  # [n_objects]

            # Then, assign each object to the corresponding maximum overlap prior
            object_for_each_prior[prior_for_each_object] = torch.LongTensor(
                range(n_objects)).to(device)

            # Then, assign maximum overlap for these objects
            overlap_for_each_prior[prior_for_each_object] = 1.

            # Get the labels for each prior
            label_for_each_prior = labels[i][
                object_for_each_prior]  # [n_priors]

            # Set as background all priors that have overlap less than overlap
            label_for_each_prior[
                overlap_for_each_prior < self.threshold] = 0  # [n_priors]

            # Append image ground truth to the batch ground truth
            true_classes[i] = label_for_each_prior

            # Encode coordinates from xmin,ymin,xmax,ymax to center-offset
            true_locs[i] = utils.cxcy_to_gcxgcy(
                utils.xy_to_cxcy(boxes[i][object_for_each_prior]),
                self.priors_cxcy)  # [n_priors, 4]

        # Identify priors taht are positive (object/non-background)
        positive_priors = true_classes != 0  # [N, n_priors]

        # Localization loss is computed only with positive (non-background) priors
        loc_loss = self.l1_loss(predicted_locs[positive_priors],
                                true_locs[positive_priors])  # scalar value

        # Confidence loss is computed over positive priors and the most difficult negative priors in each image.
        # n_negative = neg_pos_ratio * n_positives
        # Take the n_negative priors with maximum loss
        # This is called Hard Negative Mining. It concentrates on hardest negatives in each image to minimize
        # the pos/neg imbalance

        # Number of positive and hard-negative priors per image
        n_positives = positive_priors.sum(dim=1)  # [N]
        n_hard_negatives = self.neg_pos_ratio * n_positives  # [N]

        # Calculate the loss for all priors
        conf_loss_all = self.cross_entropy_loss(
            predicted_scores.view(-1, n_classes),
            true_classes.view(-1))  # [N * n_priors]
        conf_loss_all = conf_loss_all.view(batch_size,
                                           n_priors)  # [N, n_priors]

        # Get positive priors
        conf_loss_pos = conf_loss_all[positive_priors]  # [n_positives]

        # Get hard-negative priors
        conf_loss_neg = conf_loss_all.clone()  # [N, n_priors]

        # ignore positive priors
        conf_loss_neg[positive_priors] = 0

        # sort based in the loss
        conf_loss_neg, _ = conf_loss_neg.sort(dim=1, descending=True)

        hardness_ranks = torch.LongTensor(
            range(n_priors)).unsqueeze(0).expand_as(conf_loss_neg).to(
                device)  # [N,8732]
        hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1)
        conf_loss_hard_neg = conf_loss_neg[hard_negatives]

        conf_loss = (conf_loss_pos.sum() +
                     conf_loss_hard_neg.sum()) / n_positives.sum().float()

        # Final loss
        return (conf_loss + self.alpha * loc_loss)
Exemple #3
0
    def detect_objects(self, pred_locs, pred_score, min_score, max_overlap,
                       top_k):

        batch_size = pred_locs.size(0)
        n_priors = self.priors_cxcy.size(0)

        pred_score = F.softmax(pred_score, dim=2)  # [N, n_priors, n_classes]

        # lists to store predictions for all images
        batch_boxes = list()
        batch_labels = list()
        batch_scores = list()

        # for each image of the batch
        for i in range(batch_size):

            # Decoded from regression format to bounding box format
            decoded_locs = utils.gcxgcy_to_cxcy(pred_locs[i], self.priors_cxcy)
            decoded_locs = utils.cxcy_to_xy(decoded_locs)

            # lists to store predictions for an image
            image_boxes = list()
            image_labels = list()
            image_scores = list()

            # for each class
            for c in range(1, self.num_classes):

                # keep only predictions with scores above threshold
                class_scores = pred_score[i][:, c]  # [n_priors] (FloatTensor)
                score_above_min = class_scores > min_score  # [n_priors] (BoolTensor)
                n_score_above_min = score_above_min.sum().item()

                if n_score_above_min == 0:
                    continue

                class_scores = class_scores[
                    score_above_min]  # [n_qualified] (n_qualified <= n_priors)
                class_decoded_locs = decoded_locs[
                    score_above_min]  # [n_qualified, 4]

                # Find overlap between each predicted box
                overlap = utils.find_jaccard_overlap(class_decoded_locs,
                                                     class_decoded_locs)

                # Non-maximum supression
                suppress = torch.zeros((n_score_above_min),
                                       dtype=torch.bool).to(device)

                for box in range(class_decoded_locs.size(0)):

                    #
                    if suppress[box] == 1:
                        continue

                    suppress = suppress | (overlap[box] > max_overlap)

                    suppress[box] = False

                image_boxes.append(class_decoded_locs[~suppress])
                image_labels.append(
                    torch.LongTensor(
                        (~suppress).sum().item() * [c]).to(device))
                image_scores.append(class_scores[~suppress])

            # if there is no object, assign background for the image
            if len(image_boxes) == 0:
                image_boxes.append(
                    torch.FloatTensor([[0, 0, 1, 1]]).to(device))
                image_labels.append(torch.LongTensor([0]).to(device))
                image_scores.append(torch.FloatTensor([0]).to(device))

            # Create a single tensor
            image_boxes = torch.cat(image_boxes, dim=0)
            image_labels = torch.cat(image_labels, dim=0)
            image_scores = torch.cat(image_scores, dim=0)

            if image_scores.size(0) > top_k:
                image_scores, sort_ind = image_scores.sort(dim=0,
                                                           descending=True)
                image_scores = image_scores[:top_k]
                image_boxes = image_boxes[sort_ind][:top_k]
                image_labels = image_labels[sort_ind][:top_k]

            batch_boxes.append(image_boxes)
            batch_labels.append(image_labels)
            batch_scores.append(image_scores)

        return batch_boxes, image_labels, image_scores
def main():
    image_width = 224
    image_height = 224

    base_pretrained = None
    num_classes = 21

    # build detector
    model = SSD_MobileNet.SSDMobileNet(base_pretrained, num_classes)

    # get priors of the model
    model = model.to(device)
    priors_boxes_cxcy = model.create_prior()
    priors_boxes_xy = utils.cxcy_to_xy(priors_boxes_cxcy)

    # ------------------------
    #       Dataloaders
    # ------------------------
    #data_folder = "/home/feaf-seat-1/Documents/nesvera/object_detection/a-PyTorch-Tutorial-to-Object-Detection"
    data_folder = "/home/nesvera/Documents/neural_nets/object_detection/a-PyTorch-Tutorial-to-Object-Detection"
    train_dataset = datasets.PascalVOCDataset(data_folder,
                                              split='train',
                                              keep_difficult=True)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=1,
        shuffle=True,
        collate_fn=train_dataset.collate_fn,
        num_workers=2,
        pin_memory=True)

    for i in range(len(train_dataset)):

        print("priors: ", priors_boxes_cxcy.size())

        (images, boxes, labels, _) = train_dataset[i]

        image = images.permute(1, 2, 0).numpy()

        boxes_cx_cy = utils.xy_to_cxcy(boxes)
        boxes_xy = utils.cxcy_to_xy(boxes_cx_cy)

        true_locs = torch.zeros(
            (1, boxes_xy.size(0), 4),
            dtype=torch.float).to(device)  # [N, n_priors, 4]
        true_classes = torch.zeros(
            (1, boxes_xy.size(0)),
            dtype=torch.long).to(device)  # [N, n_priors]

        n_objects = boxes.size(0)

        overlap = utils.find_jaccard_overlap(
            boxes, priors_boxes_xy)  # [n_objects, n_prios]

        # create a vector with an object that has the max overlap for each prior
        overlap_for_each_prior, object_for_each_prior = overlap.max(
            dim=0)  # [n_priros]

        # Problems:
        # 1. Suppose that there are some objects near each other. It is possible that one of
        # objects does not have any good overlap for anyone of the priors. Them it will not
        # apper in any of the object_for_each_prior vector

        # First, find the prior that has the maximum overlap for each object
        _, prior_for_each_object = overlap.max(dim=1)  # [n_objects]

        # Then, assign each object to the corresponding maximum overlap prior
        object_for_each_prior[prior_for_each_object] = torch.LongTensor(
            range(n_objects)).to(device)

        # Then, assign maximum overlap for these objects
        overlap_for_each_prior[prior_for_each_object] = 1.

        # Encode coordinates from xmin,ymin,xmax,ymax to center-offset
        true_locs = utils.cxcy_to_gcxgcy(
            utils.xy_to_cxcy(boxes[object_for_each_prior]),
            priors_boxes_cxcy)  # [n_priors, 4]
        true_locs = true_locs.unsqueeze(0)

        pred_cls = torch.ones((priors_boxes_xy.size(0), 21),
                              dtype=torch.float).to(device)
        pred_cls = pred_cls.unsqueeze(0)

        det_boxes, det_labels, det_scores = model.detect_objects(
            true_locs, pred_cls, min_score=0, max_overlap=0.5, top_k=200)

        # bounding box
        for j in range(boxes.shape[0]):
            p0 = (int(boxes[j, 0] * image_width),
                  int(boxes[j, 1] * image_height))
            p1 = (int(boxes[j, 2] * image_width),
                  int(boxes[j, 3] * image_height))

            image = cv2.rectangle(image, p0, p1, (255, 0, 0), 3)

        image = image.get()

        decoded_locs = det_boxes[0]

        k = 0
        #for k in range(decoded_locs.size(0)):
        while True:
            k += 100

            p0 = (int(decoded_locs[k, 0] * image_width),
                  int(decoded_locs[k, 1] * image_height))
            p1 = (int(decoded_locs[k, 2] * image_width),
                  int(decoded_locs[k, 3] * image_height))

            img = cv2.rectangle(image.copy(), p0, p1, (0, 255, 0), 1)

            cv2.imshow("image", img)
            cv2.waitKey(0)
        '''