def crop(self, inp):
        # Case for image input.
        if inp.shape == torch.Size([self.imgHeight, self.imgWidth, 3]):
            image = inp

            # Return 300x300 patch if no object is detected.
            if self.locations is None:
                return image[0:300, 0:300, :]

            # Check the ios of the cropped image with oracle bounding box to ensure at least one labeled item.
            found = False
            cnt = 0
            while not found:
                cnt += 1
                if cnt > 300:
                    self.crop_coordinates = torch.Tensor([150, 0, 450, 300])
                    image = image[int(self.crop_coordinates[1]
                                      ):int(self.crop_coordinates[3]),
                                  int(self.crop_coordinates[0]
                                      ):int(self.crop_coordinates[2]), :]
                    break
                crop = random.randint(0, self.imgWidth - 300)
                self.crop_coordinates = torch.Tensor(
                    [crop, 0, crop + 300, 300])
                for location in self.locations:
                    if helper.ios(location,
                                  helper.corner2center(self.crop_coordinates)
                                  ) > self.cropping_ios_threshold:
                        found = True
                        image = image[int(self.crop_coordinates[1]
                                          ):int(self.crop_coordinates[3]),
                                      int(self.crop_coordinates[0]
                                          ):int(self.crop_coordinates[2]), :]
                        break

            return image

        # Case for location input.
        locations = inp
        locations[:, 0] -= self.crop_coordinates[0]

        # Set locations to 0 if the ios is too small.
        ios = helper.ios(locations, torch.Tensor([150, 150, 300, 300]))
        self.ios_index = ios > self.cropping_ios_threshold
        locations[ios <= self.cropping_ios_threshold] = 0

        locations = locations[self.ios_index]

        # Clip the location.
        locations = helper.center2corner(locations)
        locations = torch.clamp(locations, 0, 300)
        locations = helper.corner2center(locations)

        # Save the oracle locations.
        self.locations = locations

        return locations
    def sanitize(self, item):
        confidences = []
        locations = []

        for obj in item['objects']:
            confidence = torch.zeros(len(self.classes))

            try:
                confidence[self.classes.index(obj['label'])] = 1.0
                polygons = torch.Tensor(obj['polygon'])
                corner = [
                    min(polygons[:, 0]),
                    min(polygons[:, 1]),
                    max(polygons[:, 0]),
                    max(polygons[:, 1])
                ]
                corner = torch.stack(corner)

                # Add confidences and locations.
                confidences.append(confidence)
                locations.append(helper.corner2center(corner))
            except ValueError:
                pass

        # Detect if there is nothing found.
        if len(confidences) == 0:
            confidences = torch.zeros([len(self.prior_bboxes)])
            locations = self.prior_bboxes
        else:
            confidences = torch.stack(confidences)
            locations = torch.stack(locations)

        return confidences, locations
Beispiel #3
0
    def generate_all_anchors(self, im_c, size):
        if self.image_center == im_c and self.size == size:
            return False
        self.image_center = im_c
        self.size = size

        a0x = im_c - size // 2 * self.stride
        ori = np.array([a0x] * 4, dtype=np.float32)
        zero_anchors = self.anchors + ori

        x1 = zero_anchors[:, 0]
        y1 = zero_anchors[:, 1]
        x2 = zero_anchors[:, 2]
        y2 = zero_anchors[:, 3]

        x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1),
                             [x1, y1, x2, y2])
        cx, cy, w, h = corner2center([x1, y1, x2, y2])

        disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride
        disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride

        cx = cx + disp_x
        cy = cy + disp_y

        # broadcast
        zero = np.zeros((self.anchor_num, size, size), dtype=np.float32)
        cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h])
        x1, y1, x2, y2 = center2corner([cx, cy, w, h])

        self.all_anchors = np.stack([x1, y1, x2, y2]), np.stack([cx, cy, w, h])
        return True
Beispiel #4
0
    def test_Match(self):
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        pp = np.asarray(
            [[0, 0, 2, 2], [1, 0, 3, 2], [0, 0, 1, 3], [3, 3, 4, 4]],
            dtype=np.float32)
        pp = corner2center(torch.Tensor(pp))
        gt = torch.Tensor([[1, 1, 1, 1]])
        test = iou(pp, torch.Tensor(gt))
        print('iou test', test)

        test_iou = np.asarray(
            [[0, 0, 0.2, 0.2, 0.6], [0.1, 0, 0.2, 0.8, 0.2],
             [0, 0.4, 0.1, 0.3, 0.3], [0.3, 0.3, 0.5, 0.9, 0.1]],
            dtype=np.float32)
        test_iou = torch.Tensor(test_iou)
        print('test argmax', torch.argmax(test_iou, dim=1))
        zero = torch.zeros(test_iou.shape)
        test_iou = torch.where(test_iou < 0.5, zero, test_iou)
        print(test_iou)
        #zero_idx = torch.where(torch.max(test_iou, dim=0)[0] == 0, )
        gt_label = torch.tensor([1, 1, 2, 2])
        matched_label = torch.tensor(([1, 3, 4, 2, 2]))
        print('variable matched_label', matched_label, matched_label.dtype)
        zero_index = (torch.max(test_iou, dim=0)[0] == 0).nonzero()
        print('index of below 0.5', zero_index, zero_index.dtype, zero_index,
              zero_index.view(1, -1))
        matched_label[zero_index.view(1, -1)] = 0
        print('after clear out the zero', matched_label)
        possitive_sample_idx = matched_label.nonzero()
        matched_label[possitive_sample_idx.view(
            1,
            -1)] = gt_label[matched_label[possitive_sample_idx.view(1, -1)] -
                            1]
        print('non zero labels', possitive_sample_idx,
              possitive_sample_idx.dtype)
        print('final label', matched_label)
        print('test where', torch.max(test_iou, dim=0))
        for i in range(0, test_iou.shape[1]):
            if torch.max(test_iou[:, i]) < 0.5:
                print(i)
        timestamp = time.time()
        filename = 'ssd_net' + str(timestamp) + '.pth'
        print(filename)
Beispiel #5
0
 def test_corner2(self):
     prior_layer_cfg = [{
         'layer_name': 'Conv5',
         'feature_dim_hw': (19, 19),
         'bbox_size': (60, 60),
         'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t')
     }, {
         'layer_name': 'Conv11',
         'feature_dim_hw': (10, 10),
         'bbox_size': (105, 105),
         'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t')
     }, {
         'layer_name': 'Conv14_2',
         'feature_dim_hw': (5, 5),
         'bbox_size': (150, 150),
         'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t')
     }, {
         'layer_name': 'Conv15_2',
         'feature_dim_hw': (3, 3),
         'bbox_size': (195, 195),
         'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t')
     }, {
         'layer_name': 'Conv16_2',
         'feature_dim_hw': (2, 2),
         'bbox_size': (240, 240),
         'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t')
     }, {
         'layer_name': 'Conv17_2',
         'feature_dim_hw': (1, 1),
         'bbox_size': (285, 285),
         'aspect_ratio': (1.0, 1 / 2, 1 / 3, 2.0, 3.0, '1t')
     }]
     pp = generate_prior_bboxes(prior_layer_cfg)
     print('original', pp[0])
     test = center2corner(pp[0])
     print('corner', test)
     test = corner2center(test)
     print('center', test)
     torch.set_default_tensor_type('torch.cuda.FloatTensor')
     print('Pytorch CUDA Enabled?:', torch.cuda.is_available())
     b = 0.5 * torch.eye(3)
     b_gpu = b.cuda()
     print(b_gpu)
Beispiel #6
0
    print(bbox_corner)
    print(conf_preds)
    print(bbox_corner.shape)
    bbox_corner = bbox_corner
    conf_preds = conf_preds[0].detach()

    # idx = conf_preds[:, 2] > 0.6
    # bbox_corner = bbox_corner[idx]
    # bbox = bbox[idx]
    # print(bbox_corner)

    bbox_nms = bbox_helper.nms_bbox(bbox_corner, conf_preds)
    print(bbox_nms[0])
    bbox_nms = torch.Tensor(bbox_nms[0])
    print(bbox_nms.shape)
    bbox_nms_cen = bbox_helper.corner2center(bbox_nms)

    test_img = test_img.detach()
    channels = test_img.shape[1]
    h, w = test_img.shape[2], test_img.shape[3]

    img_r = test_img.reshape(h, w, channels)
    img_n = (img_r + 1) / 2

    fig, ax = plt.subplots(1)

    ax.imshow(img_n)

    for index in range(0, bbox_nms.shape[0]):
        corner = bbox_nms[index]
        corner = torch.mul(corner, 300)
Beispiel #7
0
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """

        # TODO: implement data loading
        # 1. Load image as well as the bounding box with its label
        item = self.dataset_list[idx]
        img = Image.open(item['img_path'])
        label = item['label']
        bbox = item['bbox']
        bbox_arr = np.array(bbox).reshape(-1, 4)  # tuple to array

        # 2. Random crop to 1024*1024
        bbox_croped = []
        label_croped = []
        num_box_arr = len(bbox_arr)
        flag = False
        count = 0
        while flag is False:
            count += 1
            crop_startX = random.uniform(0, 1) * 1024
            crop_size = 1024
            # if after 200 random, still not find a good crop position, then let crop pos = bbox pos
            if count == 200:
                crop_startX = bbox_arr[0][0]
                crop_size = bbox_arr[0][2] - bbox_arr[0][0]
                # print('bbox_arr 200',bbox_arr)
                # print('img_path',item['img_path'])
                # print('crop_startX',crop_startX)
                # print('crop_size',crop_size)
            for i in range(num_box_arr):
                if bbox_arr[i][
                        2] > 2048:  # bamberg_000000_000441_gtCoarse_polygons.json strange data
                    bbox_arr[i][2] = 2048
                if bbox_arr[i][0] >= crop_startX and bbox_arr[i][
                        2] <= crop_startX + crop_size:
                    flag = True
                    box = [
                        bbox_arr[i][0] - crop_startX, bbox_arr[i][1],
                        bbox_arr[i][2] - crop_startX, bbox_arr[i][3]
                    ]
                    bbox_croped.append(box)
                    label_croped.append(label[i])

        crop_pos = (crop_startX, 0, crop_startX + crop_size, crop_size)
        img_croped = img.crop(crop_pos)
        resized_size = 300
        img_resized = img_croped.resize((resized_size, resized_size))
        # img_resized.save("img300.jpg", "JPEG")
        bbox_resized = np.divide(bbox_croped, crop_size / resized_size)

        # 3. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        bbox_center_form = bbox_helper.corner2center(
            torch.tensor(bbox_resized))

        # 4. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box
        # Random flip
        will_flip = random.uniform(0, 1)
        if will_flip > 0.5:
            bbox_center_form[:,
                             0] = resized_size - bbox_center_form[:,
                                                                  0]  # x coordinates after flip
            img_resized = img_resized.transpose(Image.FLIP_LEFT_RIGHT)
        # common.drawRectsPLT(img_resized,bbox_helper.center2corner(bbox_center_form),[int(i) for i in label_croped])

        # Normalize image
        img_normalized = (img_resized - self.mean) / self.std

        # 5. Normalize the bounding box position value from 0 to 1,
        sample_labels = torch.tensor(label_croped, dtype=torch.float32)
        sample_bboxes = torch.tensor(bbox_center_form / resized_size,
                                     dtype=torch.float32)

        sample_img = np.asarray(img_normalized, dtype=np.float32)
        img_tensor = torch.from_numpy(sample_img)

        # 6. Do the matching prior and generate ground-truth labels as well as the boxes
        bbox_tensor, bbox_label_tensor = bbox_helper.match_priors(
            self.prior_bboxes, sample_bboxes, sample_labels, iou_threshold=0.5)

        # [DEBUG] check the output.
        assert isinstance(bbox_label_tensor, torch.Tensor)
        assert isinstance(bbox_tensor, torch.Tensor)
        assert bbox_tensor.dim() == 2
        assert bbox_tensor.shape[1] == 4
        assert bbox_label_tensor.dim() == 1
        assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]

        return bbox_tensor, bbox_label_tensor, img_tensor
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """


        # 1. Load image as well as the bounding box with its label
        item = self.dataset_list[idx]
        file_path = item['file_path']
        ground_truth = item['label']
        sample_labels = np.asarray(ground_truth[0], dtype=np.float32)
        sample_bboxes = np.asarray(ground_truth[1], dtype=np.float32)
        sample_img = Image.open(file_path)

        augmentation = np.random.randint(0, 4)
        sample_img, sample_bboxes, sample_labels = self.crop(sample_img,sample_bboxes,sample_labels)
        #augmentation=None
        if augmentation == 0:
            sample_img = ImageEnhance.Brightness(sample_img).enhance(np.random.randint(5, 25) / 10.0)

        # horizontal flip
        if augmentation == 1:
            sample_img = sample_img.transpose(Image.FLIP_LEFT_RIGHT)
            width = sample_img.size[0]
            flipped_boxes = sample_bboxes.copy()
            # sample_bboxes = [float(width), float(top), float(left), float(top)] - flipped_bboxes
            sample_bboxes[:, 0] = width - flipped_boxes[:, 2]
            sample_bboxes[:, 2] = width - flipped_boxes[:, 0]
            # flipped_boxes = sample_bboxes.copy()
            # sample_bboxes[:, 0] = flipped_boxes[:, 2]
            # sample_bboxes[:, 2] = flipped_boxes[:, 0]

        if augmentation == 2:
            if random.choice([True, False]) == True:
                sample_img = sample_img.filter(ImageFilter.BLUR)
            else:
                sample_img = sample_img.filter(ImageFilter.SHARPEN)

        # if augmentation == 3:
        #     w, h = sample_img.size[:2]
        #     left = np.random.randint(0, np.min(sample_bboxes[:, 0])-(np.min(sample_bboxes[:, 0])/5).astype(int))
        #     # print("left---------------",left)
        #     top = np.random.randint(0, np.min(sample_bboxes[:, 1])-(np.min(sample_bboxes[:, 1])/5).astype(int))
        #     right = np.random.randint(np.max(sample_bboxes[:, 2])+((w-np.max(sample_bboxes[:, 2]))/5).astype(int), w)
        #     # print("right--------------",right)
        #     bottom = np.random.randint( np.max(sample_bboxes[:, 3])+((h-np.max(sample_bboxes[:, 3]))/5).astype(int), h)
        #     # print("bottom-------------",bottom)
        #
        #     sample_img = sample_img.crop((left, top, right, bottom))
        #     # print(sample_bboxes[0])
        #     # print("left", left)
        #     sample_bboxes = sample_bboxes - [float(left), float(top), float(left), float(top)]
        #     # print(sample_bboxes[0])

        # 2. Normalize the image with self.mean and self.std
        img = sample_img.resize((300, 300))
        img_array = np.asarray(img)
        img_array = (img_array-self.mean)/self.std
        h, w, c = img_array.shape[0], img_array.shape[1], img_array.shape[2]

        # 3. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        #print([sample_img.size[0],sample_img.size[1],sample_img.size[0],sample_img.size[1]])
        sample_bboxes = torch.Tensor(sample_bboxes)/torch.Tensor([sample_img.size[0],sample_img.size[1],sample_img.size[0],sample_img.size[1]])

        # 4. Normalize the bounding box position value from 0 to 1
        sample_bboxes = corner2center(sample_bboxes)
        #self.prior_bboxes = center2corner(self.prior_bboxes)

        # 4. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box
        # TODO: data augmentation
        # 5. Do the matching prior and generate ground-truth labels as well as the boxes
        bbox_tensor, bbox_label_tensor = match_priors(self.prior_bboxes.cuda(), sample_bboxes.cuda(), torch.Tensor(sample_labels).cuda(), iou_threshold=0.45)
        #bbox_tensor, bbox_label_tensor = assign_priors(sample_bboxes.cuda(), torch.Tensor(sample_labels).cuda(), self.prior_bboxes.cuda(), iou_threshold=0.5)


        img_tensor = torch.Tensor(img_array)
        img_tensor = img_tensor.view(c, h, w)
        #print(img_tensor.shape)
        # [DEBUG] check the output.
        assert isinstance(bbox_label_tensor, torch.Tensor)
        assert isinstance(bbox_tensor, torch.Tensor)
        assert bbox_tensor.dim() == 2
        assert bbox_tensor.shape[1] == 4
        assert bbox_label_tensor.dim() == 1
        assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]
        return img_tensor, bbox_tensor, bbox_label_tensor
    def __getitem__(self, idx):
        """
        Load the data from list, and match the ground-truth bounding boxes with prior bounding boxes.
        :return bbox_tensor: matched bounding box, dim: (num_priors, 4)
        :return bbox_label: matched classification label, dim: (num_priors)
        """

        # TODO: implement data loading
        # 1. Load image as well as the bounding box with its label
        # 2. Normalize the image with self.mean and self.std
        # 3. Convert the bounding box from corner form (left-top, right-bottom): [(x,y), (x+w, y+h)] to
        #    center form: [(center_x, center_y, w, h)]
        # 4. Normalize the bounding box position value from 0 to 1
        item = self.dataset_list[idx]
        image_path = item['image_path']
        labels = np.asarray(item['labels'])
        labels = torch.Tensor(labels).cuda()
        locations = torch.Tensor(item['bboxes']).cuda()
        bbox = np.array(item['bboxes'])

        image = Image.open(image_path)

        self.imgWidth, self.imgHeight = image.size
        self.resize_ratio = min(self.imgHeight / 300., self.imgWidth / 300.)

        locations = helper.corner2center(locations)

        image = self.resize(image)
        locations = self.resize(locations)

        # Prepare image array first to update crop.
        image = self.crop(image)
        image = self.brighten(image)
        image = self.normalize(image)

        # Prepare labels second to apply crop.
        locations = self.crop(locations)
        locations = self.normalize(locations)

        # convert to tensor
        img_tensor = image.view(
            (image.shape[2], image.shape[0], image.shape[1]))
        img_tensor = img_tensor.cuda()

        labels = labels[self.ios_index]

        # 4. Do the augmentation if needed. e.g. random clip the bounding box or flip the bounding box

        # 5. Do the matching prior and generate ground-truth labels as well as the boxes
        bbox_tensor, bbox_label_tensor = match_priors(
            self.prior_bboxes,
            helper.center2corner(locations),
            labels,
            iou_threshold=0.5)

        # [DEBUG] check the output.
        # assert isinstance(bbox_label_tensor, torch.Tensor)
        # assert isinstance(bbox_tensor, torch.Tensor)
        # assert bbox_tensor.dim() == 2
        # assert bbox_tensor.shape[1] == 4
        # assert bbox_label_tensor.dim() == 1
        # assert bbox_label_tensor.shape[0] == bbox_tensor.shape[0]
        return img_tensor, bbox_tensor, bbox_label_tensor