def __getitem__(self, index):

        # ---------
        #  Image
        # ---------
        #index = None
        #img_path = self.img_path.rstrip()
        img_path = self.img_files[index % len(self.img_files)].rstrip()
        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img = horisontal_flip(img)

        return img_path, img
Example #2
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------
        #print(os.path.abspath(__file__))
        img_path = self.img_files[index % len(self.img_files)].rstrip()
        #print(os.path.abspath(img_path))

        img_path =os.path.join("../data/vortox-annotation",img_path[1:])
        print("img_path="+img_path)
        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()
        label_path = os.path.join('../data/vortox-annotation',label_path[1:])
        print("label_path = {}".format(label_path))
        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #3
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 9))
            a = torch.zeros((len(boxes), 4))
            boxes = torch.cat((boxes.float(), a), 1).double()

            # Extract coordinates for unpadded + unscaled image
            for i in [1, 3, 5, 7]:
                boxes[:, i] = (boxes[:, i] + pad[0]) / padded_w
            tmp = boxes[:, 1:8:2]
            x_max = torch.max(tmp, 1)[0]
            x_min = torch.min(tmp, 1)[0]
            boxes[:, 9] = (x_max + x_min) / 2
            boxes[:, 11] = x_max - x_min
            for i in [2, 4, 6, 8]:
                boxes[:, i] = (boxes[:, i] + pad[2]) / padded_h
            tmp = boxes[:, 2:9:2]
            y_max = torch.max(tmp, 1)[0]
            y_min = torch.min(tmp, 1)[0]
            boxes[:, 10] = (y_max + y_min) / 2
            boxes[:, 12] = y_max - y_min

            targets = torch.zeros((len(boxes), 14))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #4
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (
            1, 1)  #如果没有进行归一化那么缩放比例是(1,1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 11))

            # Extract coordinates for unpadded + unscaled image
            # x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            # y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            # x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            # y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # # Adjust for added padding
            # x1 += pad[0]
            # y1 += pad[2]
            # x2 += pad[1]
            # y2 += pad[3]
            # # Returns (x, y, w, h)
            # boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            # boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            # boxes[:, 3] *= w_factor / padded_w
            # boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 12))  #增加1维用来控制batch
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #5
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        if os.path.exists(label_path):
            # boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            boxes = np.loadtxt(label_path).reshape(-1, 5)
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h

            # targets的第0个维度,是每个batch中图像的索引, 用于标记anchor box属于哪个图像,方便loss的计算
            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = torch.FloatTensor(boxes)

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #6
0
    def __getitem__(self, index):
        img, anno = super(JSONDataset, self).__getitem__(index)

        # ---------
        #  Image
        # ---------
        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(img)

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        bboxes = torch.from_numpy(
            np.array([ann['bbox'] for ann in anno]).astype(np.float))
        # Extract coordinates for unpadded + unscaled image
        x1 = bboxes[:, 0].clone()
        y1 = bboxes[:, 1].clone()
        x2 = bboxes[:, 0] + bboxes[:, 2]
        y2 = bboxes[:, 1] + bboxes[:, 3]
        # Adjust for added padding
        x1 += pad[0]
        y1 += pad[2]
        x2 += pad[1]
        y2 += pad[3]
        # Returns (x, y, w, h)
        bboxes[:, 0] = ((x1 + x2) / 2.) / padded_w
        bboxes[:, 1] = ((y1 + y2) / 2.) / padded_h
        bboxes[:, 2] = bboxes[:, 2] * 1. / padded_w
        bboxes[:, 3] = bboxes[:, 3] * 1. / padded_h

        targets = torch.zeros((len(bboxes), 6))
        targets[:, 1] = torch.from_numpy(
            np.array([
                self.json_category_id_to_contiguous_id[ann['category_id']]
                for ann in anno
            ]))
        targets[:, 2:] = bboxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        img_id = self.ids[index]
        return img_id, img, targets
Example #7
0
    def __getitem__(self, index):

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = transforms.ToTensor()(img)

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #8
0
    def __getitem__(self, index):
        # img_path, img, targets = self.load_mosaic(index)

        img_path, img = self.load_image(index)
        targets = self.load_targets(index, img.numpy())
        img = resize(img, self.img_size)
        
        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
    def get_image(self, img_path, label_path):
        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape
        # ---------
        #  Label
        # ---------
        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path, dtype=np.float32).reshape(-1, 5))
            #print ('boxes ', boxes)
            if list(boxes.size())[0]!=0:
               # Extract coordinates for unpadded + unscaled image
               x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
               y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
               x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
               y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
               # Adjust for added padding
               x1 += pad[0]
               y1 += pad[2]
               x2 += pad[1]
               y2 += pad[3]
               # Returns (x, y, w, h)
               boxes[:, 1] = ((x1 + x2) / 2) / padded_w
               boxes[:, 2] = ((y1 + y2) / 2) / padded_h
               w_ = float(w_factor) / float(padded_w)
               boxes[:, 3] *= w_
               h_ = float(h_factor) / float(padded_h)
               boxes[:, 4] *= h_
            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes
               
        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)
        return img_path, img, targets
Example #10
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(self.load_images(index))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        boxes = torch.from_numpy(self.load_labels(index))
        targets = torch.zeros((len(boxes), 6))
        # Adjust for added padding
        x1,y1,x2,y2 = boxes[:,1],boxes[:,2],boxes[:,3],boxes[:,4]
        x1 += pad[0]
        y1 += pad[2]
        x2 += pad[1]
        y2 += pad[3]
        # Returns (x, y, w, h)
        targets[:, 1] = boxes[:,0]
        targets[:, 2] = ((x1 + x2) / 2) / padded_w
        targets[:, 3] = ((y1 + y2) / 2) / padded_h
        targets[:, 4] = (x2-x1)/padded_w
        targets[:, 5] = (y2-y1)/padded_h

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return _, img, targets
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        diff_mode = self.diff_mode
        if diff_mode != 0:
            diff_img_path = self.diff_img_files[index % len(
                self.diff_img_files)].rstrip()
            print("Path check", img_path, diff_img_path)
            diff_img = transforms.ToTensor()(
                Image.open(diff_img_path).convert('L'))
        print(img.dtype, diff_img.dtype)

        # concat diff img to img
        if diff_mode == 1:
            img = torch.cat([img, diff_img], axis=0)
            print(img.shape)
        # mode2 (calculate to use diff)
        if diff_mode == 2:
            img = img * diff_img * 2 + img

        # Handle images with less than three channels
        # if len(img.shape) != 3:
        #     img = img.unsqueeze(0)
        #     img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #12
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        img = lycon.load(img_path)

        # Handles images with less than three channels
        if len(img.shape) != 3:
            img = np.expand_dims(img, -1)
            img = np.repeat(img, 3, -1)

        h, w, _ = img.shape
        img, pad = pad_to_square(img, 127.5)
        padded_h, padded_w, _ = img.shape
        # Resize to target shape
        img = lycon.resize(img, height=self.img_size, width=self.img_size)
        # Channels-first and normalize
        img = torch.from_numpy(img).float().permute((2, 0, 1)) / 255.0

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        labels = None
        if os.path.exists(label_path):
            labels = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w * (labels[:, 1] - labels[:, 3] / 2)
            y1 = h * (labels[:, 2] - labels[:, 4] / 2)
            x2 = w * (labels[:, 1] + labels[:, 3] / 2)
            y2 = h * (labels[:, 2] + labels[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[1][0]
            y1 += pad[0][0]
            x2 += pad[1][1]
            y2 += pad[0][1]

            if self.is_training:
                # Returns (x, y, w, h)
                labels[:, 1] = ((x1 + x2) / 2) / padded_w
                labels[:, 2] = ((y1 + y2) / 2) / padded_h
                labels[:, 3] *= w / padded_w
                labels[:, 4] *= h / padded_h
            else:
                # Returns (x1, y1, x2, y2)
                labels[:, 1] = x1 * (self.img_size / padded_w)
                labels[:, 2] = y1 * (self.img_size / padded_h)
                labels[:, 3] = x2 * (self.img_size / padded_w)
                labels[:, 4] = y2 * (self.img_size / padded_h)

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, labels = horisontal_flip(img, labels)

        # Add dummy label if there are none
        num_labels = 1 if labels is None else len(labels)
        boxes = torch.zeros((num_labels, 6))
        if labels is not None:
            boxes[:, 1:] = labels

        return img_path, img, boxes
Example #13
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        label_path = self.label_files[index % len(self.img_files)].rstrip()

        # mosaic = True and self.augment  # 4 images
        # if mosaic:
        #     img, labels = load_mosaic(self, index)
        #     h, w, _ = img.shape
        # else:
        #     img = load_image(self, index)
        #     h, w, _ = img.shape

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # boxes[:, 0] => class id
            # Returns (xc, yc, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w  # newer center x
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h  # newer center y
            boxes[:, 3] *= w_factor / padded_w  # newer width
            boxes[:, 4] *= h_factor / padded_h  # newer height

            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                # 这里的targets是个N行6列的数组
                # 来源是第0列的标签列不管,第1到5列是cls和box的大小,这时候的box是pad之后的中心点坐标加宽高
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #14
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        img_path = self.img_files[index % len(self.img_files)]

        labels = self.label_files[index % len(self.label_files)]

        class_label = labels[4]
        x_min = labels[0]
        y_min = labels[1]
        x_max = labels[2]
        y_max = labels[3]

        # Adjust for added padding
        # pad = (0, 0, pad1, pad2) if h <= w else (pad1, pad2, 0, 0)
        x_min += pad[0]
        y_min += pad[2]
        x_max += pad[1]
        y_max += pad[3]

        x = ((x_min + x_max) / 2) / padded_w
        if x_max >= padded_w:
            return None, None, None
            # x = ((x_min + x_max - 1) / 2) / padded_w
            # print("X: ", x)
            # print("Width of image: ", (x_max - x_min))
        y = ((y_min + y_max) / 2) / padded_h
        if y_max >= padded_h:
            return None, None, None
            # y = ((y_min + y_max - 1) / 2) / padded_h
            # print("Y: ", y)
            # print("Height of image: ", (y_max - y_min))
        w = (x_max - x_min) / padded_w
        h = (y_max - y_min) / padded_h

        # Numerical stability
        # if x + w/2 >= 1:
        #     w -= (x + w/2) - 1 + 1e-16
        # if y + h/2 >= 1:
        #     h -= (y + h/2) - 1 + 1e-16

        targets = torch.tensor([[0, class_label, x, y, w, h]],
                               dtype=torch.float)

        # print("X: ", x)
        # print("Y: ", y)
        # print("W: ", w)
        # print("H: ", h)

        # print("Far X: ", x + w/2)
        # print("Far y: ", y + h/2)

        # assert x + w/2 < 1
        # assert x - w/2 > 0
        # assert y + h/2 < 1
        # assert y - h/2 > 0

        # Improting Image class from PIL module

        # Cropped image of above dimension
        # (It will not change orginal image)

        # img = (img.numpy() * 255)[0]
        # print(img)
        # img = Image.fromarray(img)
        # print(img)
        # cropped_image = img.crop((x*padded_w - (w*padded_w) / 2, y*padded_h - (h*padded_h) / 2, x*padded_w + (w*padded_w) / 2, y*padded_h + (h*padded_h) / 2))
        # # Shows the image in image viewer
        # cropped_image.show()

        # assert False

        # targets = None
        # if os.path.exists(label_path):
        #     boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
        #     # Extract coordinates for unpadded + unscaled image
        #     x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
        #     y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
        #     x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
        #     y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
        #     # Adjust for added padding
        #     x1 += pad[0]
        #     y1 += pad[2]
        #     x2 += pad[1]
        #     y2 += pad[3]
        #     # Returns (x, y, w, h)
        #     boxes[:, 1] = ((x1 + x2) / 2) / padded_w
        #     boxes[:, 2] = ((y1 + y2) / 2) / padded_h
        #     boxes[:, 3] *= w_factor / padded_w
        #     boxes[:, 4] *= h_factor / padded_h

        #     targets = torch.zeros((len(boxes), 6))
        #     targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #15
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        img_path += '.jpg'
        img_path = self.img_path + img_path

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
        _, height, weight = img.shape
        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                lines = f.readlines()
                f.close()

            list = []
            for i in range(len(lines)):
                line = lines[i]

                label = line.split(' ')[1]

                x_left = line.split(' ')[2]
                x_left = int(x_left)

                y_left = line.split(' ')[3]
                y_left = int(y_left)

                x_right = line.split(' ')[4]
                x_right = int(x_right)

                y_right = line.split(' ')[5]
                y_right = int(y_right)

                olabel, x_mid, y_mid, x_length, y_length = self.get_arguments(
                    height, weight, label, x_left, y_left, x_right, y_right)

                x_mid = '%.5f' % x_mid
                y_mid = '%.5f' % y_mid
                x_length = '%.5f' % x_length
                y_length = '%.5f' % y_length

                list.append([
                    float(olabel),
                    float(x_mid),
                    float(y_mid),
                    float(x_length),
                    float(y_length)
                ])

            nplist = np.array(list)
            boxes = torch.from_numpy(nplist.reshape(-1, 5))

            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        # Extract image as PyTorch tensor
        #img_path = os.path.join('/home/users/visionintelligence/rohit/proj/yolo/PyTorch-YOLOv3/data', img_path)
        #img_path = img_path.replace('./train_sets/train_set_1', 'data/custom')
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))
        
        #resize images here itself
        img = resize(img, 512)
        
        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        
        #print(f'image shape is {img.shape}\n')
        
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets
Example #17
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index %
                                  len(self.img_files)].rstrip()  #获得第index张图片

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(
            Image.open(img_path).convert('RGB'))  #用RGB格式读入图片

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))  #增加通道数,扩充成三通道

        _, h, w = img.shape
        self.orl_size.append([h, w])
        # print(self.orl_size)
        h_factor, w_factor = (h, w) if self.normalized_labels else (1,
                                                                    1)  #原图的高和宽
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)  #向图片中添加像素
        _, padded_h, padded_w = img.shape  #填充后的h,w

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index %
                                      len(self.img_files)].rstrip()  #获得标签

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(
                -1, 5))  #boxes为标签列表
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2
                             )  #获得x1,y1,x2,y2,这是本来就有的数据
            # Adjust for added padding

            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]  #求添加空白后的x1,y1,x2,y2
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w  #求填充后的w,h
            boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 6))  #目标等于标签的数量
            targets[:, 1:] = boxes  #中间5个相同

        # Apply augmentations
        if self.augment:  #不添加噪声
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets  #返回图片位置,填充后的图片和targets
Example #18
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()

        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()

        targets = None
        boxes   = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h
            # boxes的数量就是单个图片中框选出的目标数量,在5列前增加一个序号列,变成了[idx,class,x,y,w,h]
            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                # 在训练visdrone数据集时发现这步有时候会出现如下错误:(已解决)
                # targets[:, 2] = 1 - targets[:, 2]
                # TypeError: 'NoneType' object is not subscriptable
                try:
                    img, targets = horisontal_flip(img, targets)
                except:
                    print(f"datasets.py: img_path={img_path}")
                    print(f"targets={targets}")
                    print(f"boxes={boxes}")

        return img_path, img, targets
Example #19
0
    def __getitem__(self, index):

        # ---------
        #  Image
        # ---------

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        img_path = 'data/coco' + img_path
        # Extract image as PyTorch tensor
        img = transforms.ToTensor()(Image.open(img_path).convert('RGB'))

        # Handle images with less than three channels
        if len(img.shape) != 3:
            img = img.unsqueeze(0)
            img = img.expand((3, img.shape[1:]))

        _, h, w = img.shape
        '''
        如果是归一化的标签,则返回原图的尺度
        '''
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)
        # Pad to square resolution
        img, pad = pad_to_square(img, 0)
        _, padded_h, padded_w = img.shape

        # ---------
        #  Label
        # ---------

        label_path = self.label_files[index % len(self.img_files)].rstrip()
        label_path = 'data/coco' + label_path
        targets = None
        '''将原归一化的标签转化为pad后的归一化标签'''
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))
            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)
            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]
            # Returns (x, y, w, h)
            '''将true_label转换成归一化的(x,y,w,h),x,y是label的中心坐标'''
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h
            '''targets 第1-5列保存boxes 第0-4列的值
             targets第0列在下面的collate_fn函数中用来生成索引
            '''
            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        '''数据增强'''
        if self.augment:
            if np.random.random() < 0.5:
                img, targets = horisontal_flip(img, targets)

        return img_path, img, targets