def coco_2017_yolo( root: str = default_dataset_path("coco-detection"), train: bool = False, rand_trans: bool = False, download: bool = True, year: str = "2017", image_size: int = 640, preprocessing_type: str = "yolo", ): """ Wrapper for COCO detection dataset with Dataset Registry values properly created for a Yolo model trained on 80 classes. :param root: The root folder to find the dataset at, if not found will download here if download=True :param train: True if this is for the training distribution, False for the validation :param rand_trans: True to apply RandomCrop and RandomHorizontalFlip to the data, False otherwise :param download: True to download the dataset, False otherwise. :param year: Only valid option is 2017. default is 2017. :param image_size: the size of the image to output from the dataset :param preprocessing_type: Type of standard pre-processing to perform. Only valid option is 'yolo'. Default is 'yolo' """ if preprocessing_type != "yolo": raise ValueError( "Only valid preprocessing type for Coco 2017 Yolo dataset is 'yolo'" " received: {}".foramt(preprocessing_type)) if int(year) != 2017: raise ValueError( "Only valid year type for Coco 2017 Yolo dataset is 2017" " received: {}".foramt(year)) return CocoDetectionDataset(root, train, rand_trans, download, year, image_size, "yolo")
def __init__( self, root: str = default_dataset_path( "imagenet"), # default to imagenet location train: bool = True, rand_trans: bool = False, image_size: int = 224, ): if torchvision_import_error is not None: raise torchvision_import_error root = clean_path(root) non_rand_resize_scale = 256.0 / 224.0 # standard used init_trans = ([ transforms.RandomResizedCrop(image_size), transforms.RandomHorizontalFlip(), ] if rand_trans else [ transforms.Resize(round(non_rand_resize_scale * image_size)), transforms.CenterCrop(image_size), ]) trans = [ *init_trans, transforms.ToTensor(), transforms.Normalize(mean=IMAGENET_RGB_MEANS, std=IMAGENET_RGB_STDS), ] root = os.path.join(os.path.abspath(os.path.expanduser(root)), "train" if train else "val") super().__init__(root, transform=transforms.Compose(trans)) if train: # make sure we dont preserve the folder structure class order random.shuffle(self.samples)
def __init__( self, root: str = default_dataset_path("imagenet"), train: bool = True, rand_trans: bool = False, image_size: Union[None, int, Tuple[int, int]] = 224, pre_resize_transforms=SplitsTransforms( train=( random_scaling_crop(), tf.image.random_flip_left_right, ), val=(imagenet_pre_resize_processor(), ), ), post_resize_transforms=SplitsTransforms( train=(torch_imagenet_normalizer(), ), val=(torch_imagenet_normalizer(), )), ): root = clean_path(root) super().__init__( root, train, image_size=image_size, pre_resize_transforms=pre_resize_transforms, post_resize_transforms=post_resize_transforms, ) if train: # make sure we don't preserve the folder structure class order random.shuffle(self.samples)
def __init__( self, root: str = default_dataset_path("imagenet"), train: bool = True, image_size: int = 224, ): super().__init__(root, train, image_size)
def __init__( self, root: str = default_dataset_path("cifar10"), train: bool = True, rand_trans: bool = False, ): if torchvision_import_error is not None: raise torchvision_import_error if rand_trans: trans = [ transforms.RandomResizedCrop(32), transforms.RandomHorizontalFlip(), ] else: trans = [transforms.Resize(32), transforms.CenterCrop(32)] trans.extend( [ transforms.ToTensor(), transforms.Normalize(mean=_CIFAR10_RGB_MEANS, std=_CIFAR10_RGB_STDS), ] ) super().__init__(root, train, transforms.Compose(trans), None, True)
def __init__( self, root: str = default_dataset_path("voc-segmentation"), train: bool = True, rand_trans: bool = False, download: bool = True, year: str = "2012", image_size: int = 300, ): if torchvision_import_error is not None: raise torchvision_import_error if VOCSegmentation is object: raise ValueError( "VOC is unsupported on this torchvision version, please upgrade to use" ) root = os.path.abspath(os.path.expanduser(root)) trans = ([ transforms.Resize((image_size, image_size)), transforms.RandomHorizontalFlip(), ] if rand_trans else [transforms.Resize((image_size, image_size))]) trans.extend([ transforms.ToTensor(), transforms.Normalize(mean=IMAGENET_RGB_MEANS, std=IMAGENET_RGB_STDS), ]) super().__init__( root, year=year, image_set="train" if train else "val", download=download, transform=transforms.Compose(trans), )
def __init__( self, root: str = default_dataset_path("cifar100"), train: bool = True, rand_trans: bool = False, ): normalize = transforms.Normalize(mean=_CIFAR100_RGB_MEANS, std=_CIFAR100_RGB_STDS) trans = ([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip() ] if rand_trans else []) trans.extend([transforms.ToTensor(), normalize]) super().__init__(root, train, transforms.Compose(trans), None, True)
def __init__( self, root: str = default_dataset_path("mnist"), train: bool = True, flatten: bool = False, ): if torchvision_import_error is not None: raise torchvision_import_error transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, ), (1.0, ))]) super().__init__(root, train, transform, None, True) self._flatten = flatten
def __init__( self, root: str = default_dataset_path("imagenette"), train: bool = True, dataset_size: ImagenetteSize = ImagenetteSize.s160, image_size: Union[int, None] = None, download: bool = True, ): ImagenetteDownloader.__init__(self, root, dataset_size, download) self._train = train if image_size is None: image_size = 160 if dataset_size == ImagenetteSize.s160 else 224 super().__init__(self.extracted_root, train, image_size)
def __init__( self, root: str = default_dataset_path("imagenette"), train: bool = True, rand_trans: bool = False, dataset_size: ImagenetteSize = ImagenetteSize.s160, image_size: Union[int, None] = None, download: bool = True, ): if torchvision_import_error is not None: raise torchvision_import_error ImagenetteDownloader.__init__(self, root, dataset_size, download) if image_size is None: image_size = 160 if dataset_size == ImagenetteSize.s160 else 224 if rand_trans: trans = [ transforms.RandomResizedCrop(image_size), transforms.RandomHorizontalFlip(), ] else: resize_scale = 256.0 / 224.0 # standard used trans = [ transforms.Resize(round(resize_scale * image_size)), transforms.CenterCrop(image_size), ] trans.extend([ transforms.ToTensor(), transforms.Normalize(mean=IMAGENET_RGB_MEANS, std=IMAGENET_RGB_STDS), ]) ImageFolder.__init__(self, self.split_root(train), transforms.Compose(trans)) # make sure we don't preserve the folder structure class order random.shuffle(self.samples)
def __init__( self, root: str = default_dataset_path("coco-detection"), train: bool = False, rand_trans: bool = False, download: bool = True, year: str = "2017", image_size: int = 300, preprocessing_type: str = None, default_boxes: DefaultBoxes = None, ): if torchvision_import_error is not None: raise torchvision_import_error if pycocotools is None: raise ValueError( "pycocotools is not installed, please install to use") if preprocessing_type not in [None, "yolo", "ssd"]: raise ValueError( "preprocessing type {} not supported, valid values are: {}". format(preprocessing_type, [None, "yolo", "ssd"])) root = os.path.join(os.path.abspath(os.path.expanduser(root)), str(year)) if train: data_path = "{root}/train{year}".format(root=root, year=year) annotation_path = "{root}/annotations/instances_train{year}.json".format( root=root, year=year) else: data_path = "{root}/val{year}".format(root=root, year=year) annotation_path = "{root}/annotations/instances_val{year}.json".format( root=root, year=year) if not os.path.isdir(data_path) and download: dataset_type = "train" if train else "val" zip_url = "{COCO_IMAGE_ZIP_ROOT}/{dataset_type}{year}.zip".format( COCO_IMAGE_ZIP_ROOT=COCO_IMAGE_ZIP_ROOT, dataset_type=dataset_type, year=year, ) zip_path = os.path.join(root, "images.zip") annotation_url = ( "{COCO_ANNOTATION_ZIP_ROOT}/annotations_trainval{year}.zip". format(COCO_ANNOTATION_ZIP_ROOT=COCO_ANNOTATION_ZIP_ROOT, year=year)) annotation_zip_path = os.path.join(root, "annotation.zip") os.makedirs(root, exist_ok=True) print("Downloading coco dataset") print("Downloading image files...") request.urlretrieve(zip_url, zip_path) print("Unzipping image files...") with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(root) print("Downloading annotations files...") request.urlretrieve(annotation_url, annotation_zip_path) print("Unzipping annotation files...") with zipfile.ZipFile(annotation_zip_path, "r") as zip_ref: zip_ref.extractall(root) elif not os.path.isdir(root): raise ValueError( f"Coco Dataset Path {root} does not exist. Please download dataset." ) yolo_preprocess = preprocessing_type == "yolo" trans = [ # process annotations lambda img, ann: ( img, _extract_bounding_box_and_labels(img, ann, yolo_preprocess), ), ] if rand_trans: # add random crop, flip, and jitter to pipeline jitter_fn = ColorJitter(brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05) trans.extend([ # Random cropping as implemented in SSD paper ssd_random_crop_image_and_annotations, # random horizontal flip random_horizontal_flip_image_and_annotations, # image color jitter lambda img, ann: (jitter_fn(img), ann), ]) trans.extend([ # resize image lambda img, ann: ( torch_functional.resize(img, (image_size, image_size)), ann, ), # Convert image to tensor lambda img, ann: (torch_functional.to_tensor(img), ann), ]) # Normalize image except for yolo preprocessing if not yolo_preprocess: trans.append(lambda img, ann: ( torch_functional.normalize(img, IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS), ann, )) if preprocessing_type == "ssd": default_boxes = default_boxes or get_default_boxes_300() # encode the bounding boxes and labels with the default boxes trans.append(lambda img, ann: ( img, ( *default_boxes.encode_image_box_labels(*ann), ann, ), # encoded_boxes, encoded_labels, original_annotations )) elif yolo_preprocess: trans.append(lambda img, ann: ( img, (bounding_box_and_labels_to_yolo_fmt(ann), ann), )) super().__init__( root=data_path, annFile=annotation_path, transforms=AnnotatedImageTransforms(trans), ) self._default_boxes = default_boxes
def __init__( self, root: str = default_dataset_path("voc-detection"), train: bool = True, rand_trans: bool = False, download: bool = True, year: str = "2012", image_size: int = 300, preprocessing_type: str = None, default_boxes: DefaultBoxes = None, ): if torchvision_import_error is not None: raise torchvision_import_error if VOCDetection == object: raise ValueError( "VOC is unsupported on this torchvision version, please upgrade to use" ) if preprocessing_type not in [None, "yolo", "ssd"]: raise ValueError( "preprocessing type {} not supported, valid values are: {}". format(preprocessing_type, [None, "yolo", "ssd"])) root = os.path.abspath(os.path.expanduser(root)) trans = [ # process annotations lambda img, ann: (img, _extract_bounding_box_and_labels(img, ann)), ] if rand_trans: # add random crop, flip, and jitter to pipeline jitter_fn = ColorJitter(brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05) trans.extend([ # Random cropping as implemented in SSD paper ssd_random_crop_image_and_annotations, # random horizontal flip random_horizontal_flip_image_and_annotations, # image color jitter lambda img, ann: (jitter_fn(img), ann), ]) trans.extend([ # resize image lambda img, ann: (F.resize(img, (image_size, image_size)), ann), # Convert image to tensor lambda img, ann: (F.to_tensor(img), ann), ]) # Normalize image except for yolo preprocessing if preprocessing_type != "yolo": trans.append(lambda img, ann: ( F.normalize(img, IMAGENET_RGB_MEANS, IMAGENET_RGB_STDS), ann, )) if preprocessing_type == "ssd": default_boxes = default_boxes or get_default_boxes_300(voc=True) # encode the bounding boxes and labels with the default boxes trans.append(lambda img, ann: ( img, ( *default_boxes.encode_image_box_labels(*ann), ann, ), # encoded_boxes, encoded_labels, original_annotations )) elif preprocessing_type == "yolo": trans.append(lambda img, ann: ( img, (bounding_box_and_labels_to_yolo_fmt(ann), ann), )) super().__init__( root, year=year, image_set="train" if train else "val", download=download, transforms=AnnotatedImageTransforms(trans), ) self._default_boxes = default_boxes