Ejemplo n.º 1
0
def coco_2017_yolo(
    root: str = default_dataset_path("coco-detection"),
    train: bool = False,
    rand_trans: bool = False,
    download: bool = True,
    year: str = "2017",
    image_size: int = 640,
    preprocessing_type: str = "yolo",
):
    """
    Wrapper for COCO detection dataset with Dataset Registry values properly
    created for a Yolo model trained on 80 classes.

    :param root: The root folder to find the dataset at, if not found will
        download here if download=True
    :param train: True if this is for the training distribution,
        False for the validation
    :param rand_trans: True to apply RandomCrop and RandomHorizontalFlip to the data,
        False otherwise
    :param download: True to download the dataset, False otherwise.
    :param year: Only valid option is 2017. default is 2017.
    :param image_size: the size of the image to output from the dataset
    :param preprocessing_type: Type of standard pre-processing to perform.
        Only valid option is 'yolo'. Default is 'yolo'
    """
    if preprocessing_type != "yolo":
        raise ValueError(
            "Only valid preprocessing type for Coco 2017 Yolo dataset is 'yolo'"
            " received: {}".foramt(preprocessing_type))
    if int(year) != 2017:
        raise ValueError(
            "Only valid year type for Coco 2017 Yolo dataset is 2017"
            " received: {}".foramt(year))
    return CocoDetectionDataset(root, train, rand_trans, download, year,
                                image_size, "yolo")
Ejemplo n.º 2
0
    def __init__(
        self,
        root: str = default_dataset_path(
            "imagenet"),  # default to imagenet location
        train: bool = True,
        rand_trans: bool = False,
        image_size: int = 224,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error

        root = clean_path(root)
        non_rand_resize_scale = 256.0 / 224.0  # standard used
        init_trans = ([
            transforms.RandomResizedCrop(image_size),
            transforms.RandomHorizontalFlip(),
        ] if rand_trans else [
            transforms.Resize(round(non_rand_resize_scale * image_size)),
            transforms.CenterCrop(image_size),
        ])

        trans = [
            *init_trans,
            transforms.ToTensor(),
            transforms.Normalize(mean=IMAGENET_RGB_MEANS,
                                 std=IMAGENET_RGB_STDS),
        ]
        root = os.path.join(os.path.abspath(os.path.expanduser(root)),
                            "train" if train else "val")

        super().__init__(root, transform=transforms.Compose(trans))

        if train:
            # make sure we dont preserve the folder structure class order
            random.shuffle(self.samples)
Ejemplo n.º 3
0
    def __init__(
        self,
        root: str = default_dataset_path("imagenet"),
        train: bool = True,
        rand_trans: bool = False,
        image_size: Union[None, int, Tuple[int, int]] = 224,
        pre_resize_transforms=SplitsTransforms(
            train=(
                random_scaling_crop(),
                tf.image.random_flip_left_right,
            ),
            val=(imagenet_pre_resize_processor(), ),
        ),
        post_resize_transforms=SplitsTransforms(
            train=(torch_imagenet_normalizer(), ),
            val=(torch_imagenet_normalizer(), )),
    ):
        root = clean_path(root)
        super().__init__(
            root,
            train,
            image_size=image_size,
            pre_resize_transforms=pre_resize_transforms,
            post_resize_transforms=post_resize_transforms,
        )

        if train:
            # make sure we don't preserve the folder structure class order
            random.shuffle(self.samples)
Ejemplo n.º 4
0
 def __init__(
     self,
     root: str = default_dataset_path("imagenet"),
     train: bool = True,
     image_size: int = 224,
 ):
     super().__init__(root, train, image_size)
Ejemplo n.º 5
0
    def __init__(
        self,
        root: str = default_dataset_path("cifar10"),
        train: bool = True,
        rand_trans: bool = False,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error

        if rand_trans:
            trans = [
                transforms.RandomResizedCrop(32),
                transforms.RandomHorizontalFlip(),
            ]
        else:
            trans = [transforms.Resize(32), transforms.CenterCrop(32)]

        trans.extend(
            [
                transforms.ToTensor(),
                transforms.Normalize(mean=_CIFAR10_RGB_MEANS, std=_CIFAR10_RGB_STDS),
            ]
        )

        super().__init__(root, train, transforms.Compose(trans), None, True)
Ejemplo n.º 6
0
    def __init__(
        self,
        root: str = default_dataset_path("voc-segmentation"),
        train: bool = True,
        rand_trans: bool = False,
        download: bool = True,
        year: str = "2012",
        image_size: int = 300,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error
        if VOCSegmentation is object:
            raise ValueError(
                "VOC is unsupported on this torchvision version, please upgrade to use"
            )

        root = os.path.abspath(os.path.expanduser(root))
        trans = ([
            transforms.Resize((image_size, image_size)),
            transforms.RandomHorizontalFlip(),
        ] if rand_trans else [transforms.Resize((image_size, image_size))])
        trans.extend([
            transforms.ToTensor(),
            transforms.Normalize(mean=IMAGENET_RGB_MEANS,
                                 std=IMAGENET_RGB_STDS),
        ])

        super().__init__(
            root,
            year=year,
            image_set="train" if train else "val",
            download=download,
            transform=transforms.Compose(trans),
        )
Ejemplo n.º 7
0
 def __init__(
     self,
     root: str = default_dataset_path("cifar100"),
     train: bool = True,
     rand_trans: bool = False,
 ):
     normalize = transforms.Normalize(mean=_CIFAR100_RGB_MEANS,
                                      std=_CIFAR100_RGB_STDS)
     trans = ([
         transforms.RandomCrop(32, padding=4),
         transforms.RandomHorizontalFlip()
     ] if rand_trans else [])
     trans.extend([transforms.ToTensor(), normalize])
     super().__init__(root, train, transforms.Compose(trans), None, True)
Ejemplo n.º 8
0
    def __init__(
        self,
        root: str = default_dataset_path("mnist"),
        train: bool = True,
        flatten: bool = False,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error

        transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize((0.5, ), (1.0, ))])
        super().__init__(root, train, transform, None, True)
        self._flatten = flatten
Ejemplo n.º 9
0
    def __init__(
        self,
        root: str = default_dataset_path("imagenette"),
        train: bool = True,
        dataset_size: ImagenetteSize = ImagenetteSize.s160,
        image_size: Union[int, None] = None,
        download: bool = True,
    ):
        ImagenetteDownloader.__init__(self, root, dataset_size, download)
        self._train = train

        if image_size is None:
            image_size = 160 if dataset_size == ImagenetteSize.s160 else 224

        super().__init__(self.extracted_root, train, image_size)
Ejemplo n.º 10
0
    def __init__(
        self,
        root: str = default_dataset_path("imagenette"),
        train: bool = True,
        rand_trans: bool = False,
        dataset_size: ImagenetteSize = ImagenetteSize.s160,
        image_size: Union[int, None] = None,
        download: bool = True,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error

        ImagenetteDownloader.__init__(self, root, dataset_size, download)

        if image_size is None:
            image_size = 160 if dataset_size == ImagenetteSize.s160 else 224

        if rand_trans:
            trans = [
                transforms.RandomResizedCrop(image_size),
                transforms.RandomHorizontalFlip(),
            ]
        else:
            resize_scale = 256.0 / 224.0  # standard used
            trans = [
                transforms.Resize(round(resize_scale * image_size)),
                transforms.CenterCrop(image_size),
            ]

        trans.extend([
            transforms.ToTensor(),
            transforms.Normalize(mean=IMAGENET_RGB_MEANS,
                                 std=IMAGENET_RGB_STDS),
        ])

        ImageFolder.__init__(self, self.split_root(train),
                             transforms.Compose(trans))

        # make sure we don't preserve the folder structure class order
        random.shuffle(self.samples)
Ejemplo n.º 11
0
    def __init__(
        self,
        root: str = default_dataset_path("coco-detection"),
        train: bool = False,
        rand_trans: bool = False,
        download: bool = True,
        year: str = "2017",
        image_size: int = 300,
        preprocessing_type: str = None,
        default_boxes: DefaultBoxes = None,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error
        if pycocotools is None:
            raise ValueError(
                "pycocotools is not installed, please install to use")

        if preprocessing_type not in [None, "yolo", "ssd"]:
            raise ValueError(
                "preprocessing type {} not supported, valid values are: {}".
                format(preprocessing_type, [None, "yolo", "ssd"]))

        root = os.path.join(os.path.abspath(os.path.expanduser(root)),
                            str(year))
        if train:
            data_path = "{root}/train{year}".format(root=root, year=year)
            annotation_path = "{root}/annotations/instances_train{year}.json".format(
                root=root, year=year)
        else:
            data_path = "{root}/val{year}".format(root=root, year=year)
            annotation_path = "{root}/annotations/instances_val{year}.json".format(
                root=root, year=year)

        if not os.path.isdir(data_path) and download:
            dataset_type = "train" if train else "val"
            zip_url = "{COCO_IMAGE_ZIP_ROOT}/{dataset_type}{year}.zip".format(
                COCO_IMAGE_ZIP_ROOT=COCO_IMAGE_ZIP_ROOT,
                dataset_type=dataset_type,
                year=year,
            )
            zip_path = os.path.join(root, "images.zip")
            annotation_url = (
                "{COCO_ANNOTATION_ZIP_ROOT}/annotations_trainval{year}.zip".
                format(COCO_ANNOTATION_ZIP_ROOT=COCO_ANNOTATION_ZIP_ROOT,
                       year=year))
            annotation_zip_path = os.path.join(root, "annotation.zip")
            os.makedirs(root, exist_ok=True)
            print("Downloading coco dataset")

            print("Downloading image files...")
            request.urlretrieve(zip_url, zip_path)
            print("Unzipping image files...")
            with zipfile.ZipFile(zip_path, "r") as zip_ref:
                zip_ref.extractall(root)

            print("Downloading annotations files...")
            request.urlretrieve(annotation_url, annotation_zip_path)
            print("Unzipping annotation files...")
            with zipfile.ZipFile(annotation_zip_path, "r") as zip_ref:
                zip_ref.extractall(root)

        elif not os.path.isdir(root):
            raise ValueError(
                f"Coco Dataset Path {root} does not exist. Please download dataset."
            )
        yolo_preprocess = preprocessing_type == "yolo"
        trans = [
            # process annotations
            lambda img, ann: (
                img,
                _extract_bounding_box_and_labels(img, ann, yolo_preprocess),
            ),
        ]
        if rand_trans:
            # add random crop, flip, and jitter to pipeline
            jitter_fn = ColorJitter(brightness=0.125,
                                    contrast=0.5,
                                    saturation=0.5,
                                    hue=0.05)
            trans.extend([
                # Random cropping as implemented in SSD paper
                ssd_random_crop_image_and_annotations,
                # random horizontal flip
                random_horizontal_flip_image_and_annotations,
                # image color jitter
                lambda img, ann: (jitter_fn(img), ann),
            ])
        trans.extend([
            # resize image
            lambda img, ann: (
                torch_functional.resize(img, (image_size, image_size)),
                ann,
            ),
            # Convert image to tensor
            lambda img, ann: (torch_functional.to_tensor(img), ann),
        ])
        # Normalize image except for yolo preprocessing
        if not yolo_preprocess:
            trans.append(lambda img, ann: (
                torch_functional.normalize(img, IMAGENET_RGB_MEANS,
                                           IMAGENET_RGB_STDS),
                ann,
            ))
        if preprocessing_type == "ssd":
            default_boxes = default_boxes or get_default_boxes_300()
            # encode the bounding boxes and labels with the default boxes
            trans.append(lambda img, ann: (
                img,
                (
                    *default_boxes.encode_image_box_labels(*ann),
                    ann,
                ),  # encoded_boxes, encoded_labels, original_annotations
            ))
        elif yolo_preprocess:
            trans.append(lambda img, ann: (
                img,
                (bounding_box_and_labels_to_yolo_fmt(ann), ann),
            ))

        super().__init__(
            root=data_path,
            annFile=annotation_path,
            transforms=AnnotatedImageTransforms(trans),
        )
        self._default_boxes = default_boxes
Ejemplo n.º 12
0
    def __init__(
        self,
        root: str = default_dataset_path("voc-detection"),
        train: bool = True,
        rand_trans: bool = False,
        download: bool = True,
        year: str = "2012",
        image_size: int = 300,
        preprocessing_type: str = None,
        default_boxes: DefaultBoxes = None,
    ):
        if torchvision_import_error is not None:
            raise torchvision_import_error
        if VOCDetection == object:
            raise ValueError(
                "VOC is unsupported on this torchvision version, please upgrade to use"
            )
        if preprocessing_type not in [None, "yolo", "ssd"]:
            raise ValueError(
                "preprocessing type {} not supported, valid values are: {}".
                format(preprocessing_type, [None, "yolo", "ssd"]))

        root = os.path.abspath(os.path.expanduser(root))
        trans = [
            # process annotations
            lambda img, ann: (img, _extract_bounding_box_and_labels(img, ann)),
        ]
        if rand_trans:
            # add random crop, flip, and jitter to pipeline
            jitter_fn = ColorJitter(brightness=0.125,
                                    contrast=0.5,
                                    saturation=0.5,
                                    hue=0.05)
            trans.extend([
                # Random cropping as implemented in SSD paper
                ssd_random_crop_image_and_annotations,
                # random horizontal flip
                random_horizontal_flip_image_and_annotations,
                # image color jitter
                lambda img, ann: (jitter_fn(img), ann),
            ])
        trans.extend([
            # resize image
            lambda img, ann: (F.resize(img, (image_size, image_size)), ann),
            # Convert image to tensor
            lambda img, ann: (F.to_tensor(img), ann),
        ])
        # Normalize image except for yolo preprocessing
        if preprocessing_type != "yolo":
            trans.append(lambda img, ann: (
                F.normalize(img, IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS),
                ann,
            ))

        if preprocessing_type == "ssd":
            default_boxes = default_boxes or get_default_boxes_300(voc=True)
            # encode the bounding boxes and labels with the default boxes
            trans.append(lambda img, ann: (
                img,
                (
                    *default_boxes.encode_image_box_labels(*ann),
                    ann,
                ),  # encoded_boxes, encoded_labels, original_annotations
            ))
        elif preprocessing_type == "yolo":
            trans.append(lambda img, ann: (
                img,
                (bounding_box_and_labels_to_yolo_fmt(ann), ann),
            ))
        super().__init__(
            root,
            year=year,
            image_set="train" if train else "val",
            download=download,
            transforms=AnnotatedImageTransforms(trans),
        )
        self._default_boxes = default_boxes