class Coco2017Dataset(torch.utils.data.Dataset): def __init__( self, root: str, data_type='train', num_class: int = 91, **kwargs: dict ): self.data_transforms = get_transforms() file_name = 'train2017' if data_type == 'train' else 'val2017' self.dataset = CocoDetection( root=osp.join(root, file_name), annFile=osp.join(root, f'annotations/instances_{file_name}.json'), # transform=data_transforms[data_type], ) """ logger.info(f'Dataset DRY Run: {len(dataset)}') self.dataset = [] for d in tqdm.tqdm(dataset): if len(d[1]) == 0: continue self.dataset.append(d) """ self.data_type = data_type self.num_class = num_class np.random.seed(256) def __getitem__(self, index: int, use_cropped: bool = True): if use_cropped: return self.get_cropped_instance(index) image, targets = self.dataset.__getitem__(index) labels = torch.zeros(self.num_class, dtype=torch.int64) for target in targets: cat_id = target['category_id'] labels[cat_id] = cat_id return image, labels def get_cropped_instance(self, index: int): while True: image, targets = self.dataset.__getitem__(index) if len(targets) > 0: break index = np.random.randint(0, len(self.dataset)) selected = np.random.randint(0, len(targets)) target = targets[selected] category_id = target['category_id'] bbox = np.array(target['bbox'], dtype=np.intp) center = np.average([bbox[:2], bbox[:2] + bbox[2:]], axis=0) new_size = np.array( ( np.random.randint(bbox[2], 2 * np.maximum(bbox[2], 1)), np.random.randint(bbox[3], 2 * np.maximum(bbox[3], 1)), ), ) x1, y1 = np.maximum(center - new_size / 2, [0, 0]).astype(np.intp) x2, y2 = np.minimum(center + new_size / 2, image.size).astype(np.intp) image = image.crop([x1 + 1, y1 + 1, x2 - 1, y2 - 1]) return self.data_transforms[self.data_type](image), category_id def __len__(self): return len(self.dataset)
import torch from torch import nn from torchvision.datasets import CocoDetection # datasets trainset = CocoDetection("/media/sinclair/datasets/COCO/train2017", "/media/sinclair/datasets/COCO/annotations/instances_train2017.json") testset = CocoDetection("/media/sinclair/datasets/COCO/val2017", "/media/sinclair/datasets/COCO/annotations/instances_val2017.json") # dataloaders image = trainset.__getitem__(20)[0] image.show() width, height = image.size label = trainset.__getitem__(20)[1] # in pixel image coordinates, xywh print(width, height) for obj in label: # print(obj) print(obj['bbox']) print(obj['category_id']) # model # optimizer # training loop