Beispiel #1
0
class Coco2017Dataset(torch.utils.data.Dataset):
    def __init__(
        self, root: str, data_type='train', num_class: int = 91, **kwargs: dict
    ):
        self.data_transforms = get_transforms()
        file_name = 'train2017' if data_type == 'train' else 'val2017'
        self.dataset = CocoDetection(
            root=osp.join(root, file_name),
            annFile=osp.join(root, f'annotations/instances_{file_name}.json'),
            # transform=data_transforms[data_type],
        )

        """
        logger.info(f'Dataset DRY Run: {len(dataset)}')
        self.dataset = []
        for d in tqdm.tqdm(dataset):
            if len(d[1]) == 0:
                continue
            self.dataset.append(d)
        """

        self.data_type = data_type
        self.num_class = num_class
        np.random.seed(256)

    def __getitem__(self, index: int, use_cropped: bool = True):
        if use_cropped:
            return self.get_cropped_instance(index)

        image, targets = self.dataset.__getitem__(index)

        labels = torch.zeros(self.num_class, dtype=torch.int64)
        for target in targets:
            cat_id = target['category_id']
            labels[cat_id] = cat_id
        return image, labels

    def get_cropped_instance(self, index: int):

        while True:
            image, targets = self.dataset.__getitem__(index)
            if len(targets) > 0:
                break
            index = np.random.randint(0, len(self.dataset))

        selected = np.random.randint(0, len(targets))
        target = targets[selected]

        category_id = target['category_id']
        bbox = np.array(target['bbox'], dtype=np.intp)
        center = np.average([bbox[:2], bbox[:2] + bbox[2:]], axis=0)

        new_size = np.array(
            (
                np.random.randint(bbox[2], 2 * np.maximum(bbox[2], 1)),
                np.random.randint(bbox[3], 2 * np.maximum(bbox[3], 1)),
            ),
        )

        x1, y1 = np.maximum(center - new_size / 2, [0, 0]).astype(np.intp)
        x2, y2 = np.minimum(center + new_size / 2, image.size).astype(np.intp)
        image = image.crop([x1 + 1, y1 + 1, x2 - 1, y2 - 1])

        return self.data_transforms[self.data_type](image), category_id

    def __len__(self):
        return len(self.dataset)
Beispiel #2
0
import torch
from torch import nn
from torchvision.datasets import CocoDetection

# datasets

trainset = CocoDetection("/media/sinclair/datasets/COCO/train2017",
                         "/media/sinclair/datasets/COCO/annotations/instances_train2017.json")

testset = CocoDetection("/media/sinclair/datasets/COCO/val2017",
                         "/media/sinclair/datasets/COCO/annotations/instances_val2017.json")

# dataloaders

image = trainset.__getitem__(20)[0]

image.show()
width, height = image.size

label = trainset.__getitem__(20)[1]
# in pixel image coordinates, xywh
print(width, height)
for obj in label:
    # print(obj)
    print(obj['bbox'])
    print(obj['category_id'])
# model

# optimizer

# training loop