Ejemplo n.º 1
0
def get_CIFAR10_C(split='benchmark',
                  severity=1,
                  batch_size=None,
                  shuffle=False,
                  augm_type='none',
                  cutout_window=16,
                  num_workers=2,
                  size=32,
                  config_dict=None):
    if batch_size == None:
        batch_size = DEFAULT_TEST_BATCHSIZE

    augm_config = {}
    transform = get_cifar10_augmentation(type=augm_type,
                                         cutout_window=cutout_window,
                                         out_size=size,
                                         config_dict=augm_config)

    path = get_CIFAR10_C_path()
    dataset = CIFARCorrupted(path,
                             split=split,
                             severity=severity,
                             transform=transform)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=shuffle,
                                         num_workers=num_workers)

    if config_dict is not None:
        config_dict['Dataset'] = 'Cifar10-C'
        config_dict['Batch out_size'] = batch_size
        config_dict['Augmentation'] = augm_config

    return loader
Ejemplo n.º 2
0
def get_noise_dataset(length,
                      type='normal',
                      batch_size=128,
                      augm_type='none',
                      cutout_window=32,
                      num_workers=8,
                      size=32,
                      config_dict=None):
    augm_config = {}
    transform = get_cifar10_augmentation(type=augm_type,
                                         cutout_window=cutout_window,
                                         out_size=size,
                                         in_size=size,
                                         config_dict=augm_config)

    dataset = NoiseDataset(length, type, size, transform)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=num_workers)

    if config_dict is not None:
        config_dict['Dataset'] = 'NoiseData'
        config_dict['Length'] = length
        config_dict['Noise Type'] = type
        config_dict['Batch size'] = batch_size
        config_dict['Augmentation'] = augm_config

    return loader
Ejemplo n.º 3
0
def get_80MTinyImages(batch_size=100, augm_type='default', shuffle=True, cutout_window=16, num_workers=1,
                      size=32, exclude_cifar=False, exclude_cifar10_1=False, config_dict=None):
    #dataset is the dataset that will be excluded, eg CIFAR10
    if num_workers > 1:
        pass
        #raise ValueError('Bug in the current multithreaded tinyimages implementation')

    augm_config = {}
    transform = get_cifar10_augmentation(augm_type, cutout_window=cutout_window, out_size=size, config_dict=augm_config)

    dataset_out = TinyImagesDataset(transform,
                                    exclude_cifar=exclude_cifar, exclude_cifar10_1=exclude_cifar10_1)

    loader = torch.utils.data.DataLoader(dataset_out, batch_size=batch_size,
                                    shuffle=shuffle, num_workers=num_workers)

    if config_dict is not None:
        if config_dict is not None:
            config_dict['Dataset'] = '80M Tiny Images'
            config_dict['Shuffle'] = shuffle
            config_dict['Batch out_size'] = batch_size
            config_dict['Exclude CIFAR'] = exclude_cifar
            config_dict['Exclude CIFAR10.1'] = exclude_cifar10_1
            config_dict['Augmentation'] = augm_config

    return loader
Ejemplo n.º 4
0
def get_LSUN_scenes(split='train',
                    samples_per_class=None,
                    batch_size=None,
                    shuffle=None,
                    augm_type='none',
                    augm_class='imagenet',
                    num_workers=8,
                    size=224,
                    config_dict=None):
    if batch_size is None:
        batch_size = DEFAULT_TEST_BATCHSIZE

    augm_config = {}

    if augm_class == 'imagenet':
        transform = get_imageNet_augmentation(type=augm_type,
                                              out_size=size,
                                              config_dict=augm_config)
    elif augm_class == 'cifar':
        raise NotImplementedError()
        transform = get_cifar10_augmentation(type=augm_type,
                                             out_size=size,
                                             in_size=224,
                                             config_dict=augm_config)
    else:
        raise NotImplementedError()
    path = get_LSUN_scenes_path()
    dataset = datasets.LSUN(path, classes=split, transform=transform)

    if samples_per_class is None:
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=shuffle,
                            num_workers=num_workers)

    else:
        num_classes = len(dataset.dbs)
        idcs = torch.zeros(num_classes, samples_per_class, dtype=torch.long)
        start_idx = 0
        for i in range(num_classes):
            idcs[i, :] = torch.arange(start_idx, start_idx + samples_per_class)
            start_idx = dataset.indices[i]
        idcs = idcs.view(-1).numpy()
        sampler = SubsetRandomSampler(idcs)
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            sampler=sampler,
                            num_workers=num_workers)

    return loader
def get_CIFAR10_ti_500k(train=True,
                        batch_size=None,
                        augm_type='default',
                        fraction=0.5,
                        size=32,
                        config_dict=None):
    if batch_size == None:
        if train:
            batch_size = DEFAULT_TRAIN_BATCHSIZE
        else:
            batch_size = DEFAULT_TEST_BATCHSIZE

    augm_config = {}
    transform = get_cifar10_augmentation(type=augm_type,
                                         out_size=size,
                                         config_dict=augm_config)

    root = get_base_data_dir()
    trainset = SemiSupervisedDataset(
        base_dataset='cifar10',
        add_svhn_extra=False,
        root=root,
        train=True,
        download=True,
        transform=transform,
        aux_data_filename='cifar10_ti_500k/ti_500K_pseudo_labeled.pickle',
        add_aux_labels=True,
        aux_take_amount=None)

    # num_batches=50000 enforces the definition of an "epoch" as passing through 50K
    # datapoints
    # TODO: make sure that this code works also when trainset.unsup_indices=[]
    train_batch_sampler = SemiSupervisedSampler(
        trainset.sup_indices,
        trainset.unsup_indices,
        batch_size,
        fraction,
        num_batches=int(np.ceil(50000 / batch_size)))

    kwargs = {'num_workers': 1, 'pin_memory': True}
    train_loader = torch.utils.data.DataLoader(
        trainset, batch_sampler=train_batch_sampler, **kwargs)

    if config_dict is not None:
        config_dict['Dataset'] = 'UnlabeledDataCifar10'
        config_dict['Batch out_size'] = batch_size
        config_dict['fraction'] = fraction
        config_dict['Augmentation'] = augm_config

    return train_loader
Ejemplo n.º 6
0
def get_TinyImageNet(split,
                     batch_size=None,
                     shuffle=None,
                     augm_type='none',
                     cutout_window=32,
                     num_workers=8,
                     size=64,
                     config_dict=None):
    if batch_size == None:
        if split == 'train':
            batch_size = DEFAULT_TRAIN_BATCHSIZE
        else:
            batch_size = DEFAULT_TEST_BATCHSIZE

    augm_config = {}
    transform = get_cifar10_augmentation(type=augm_type,
                                         cutout_window=cutout_window,
                                         out_size=size,
                                         in_size=64,
                                         config_dict=augm_config)

    if shuffle is None:
        shuffle = True if split == 'train' else False

    path = get_tiny_imagenet_path()
    dataset = TinyImageNet(path, split, transform_base=transform)
    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=shuffle,
                                         num_workers=num_workers)

    if config_dict is not None:
        config_dict['Dataset'] = 'TinyImageNet'
        config_dict['Batch size'] = batch_size
        config_dict['Augmentation'] = augm_config

    return loader