def imagenette_dataloaders(config):
    transforms_train = transforms.Compose([transforms.Resize((224,224)),
                                           transforms.RandomCrop(224, padding=28),
                                           transforms.RandomHorizontalFlip(),
                                           transforms.ToTensor(),
                                           transforms.Normalize((0.4625, 0.4580, 0.4295),(0.3901, 0.3880, 0.4042))
                                           ])

    transforms_test = transforms.Compose([transforms.Resize((224,224)),
                                          transforms.ToTensor(),
                                          transforms.Normalize((0.4625, 0.4580, 0.4295),(0.3901, 0.3880, 0.4042))
                                          ])

    train_set = datasets.DatasetFolder(root='./data/imagenette2/train', loader=image_loader,
                                    is_valid_file=is_valid_file, transform=transforms_train)

    test_set = datasets.DatasetFolder(root='./data/imagenette2/val', loader=image_loader,
                                    is_valid_file=is_valid_file, transform=transforms_test)

    train_loader = DataLoader(train_set, 
                                batch_size = config['batch_size'],
                                shuffle = True, 
                                pin_memory = True,
                                num_workers = 8,
                                drop_last = False)

    test_loader = DataLoader(test_set,
                                batch_size = config['test_batch_size'],
                                shuffle=False,
                                pin_memory = True,
                                num_workers = 8,
                                drop_last = False)

    return train_loader, test_loader
def build_dataloaders(train_dir, test_dir, batch_size):

    train_softmax_dataset = datasets.DatasetFolder(train_dir,
                                                   loader=load_images_softmax,
                                                   extensions=(".mp3", ))
    train_softmax_loader = torch.utils.data.DataLoader(train_softmax_dataset,
                                                       batch_size=batch_size,
                                                       shuffle=True)

    test_softmax_dataset = datasets.DatasetFolder(test_dir,
                                                  loader=load_images_softmax,
                                                  extensions=(".mp3", ))
    test_softmax_loader = torch.utils.data.DataLoader(test_softmax_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=False)

    train_softmax_loader.idx_to_class = {
        i: c
        for c, i in train_softmax_dataset.class_to_idx.items()
    }
    test_softmax_loader.idx_to_class = {
        i: c
        for c, i in test_softmax_dataset.class_to_idx.items()
    }

    return train_softmax_loader, test_softmax_loader
Beispiel #3
0
def load_data(dataset_dir, batch_size, num_workers):
    if args.evaluation:
        test_dataset = datasets.DatasetFolder(root=os.path.join(
            dataset_dir, 'test'),
                                              loader=pickle_loader,
                                              extensions=['pkl'])
        test_dataset_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers)
        return test_dataset_loader, len(test_dataset)
    else:
        train_val_dataset = {
            tag: datasets.DatasetFolder(root=os.path.join(dataset_dir, tag),
                                        loader=pickle_loader,
                                        extensions=['pkl'])
            for tag in ['train', 'val']
        }
        train_val_dataset_size = {
            tag: len(train_val_dataset[tag])
            for tag in ['train', 'val']
        }
        train_val_dataset_loader = {
            tag: torch.utils.data.DataLoader(train_val_dataset[tag],
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=num_workers)
            for tag in ['train', 'val']
        }
        return train_val_dataset_loader, train_val_dataset_size
Beispiel #4
0
def generate_dataset(data_path,
                     loader,
                     extensions,
                     train_transform,
                     test_transform,
                     mode=None):
    if not mode:
        #    train_path = os.path.join(data_path, 'Kaggle2015+2019_prep_train_sigma10')
        train_path = os.path.join(data_path, 'Kaggle2015_prep_train_sigma10')

        #        train_path = os.path.join(data_path, 'ISBI2020_prep_Train_sigma10')
        test_path = os.path.join(data_path, 'ISBI2020_prep_Test_sigma10')

        train_dataset = datasets.DatasetFolder(train_path,
                                               loader,
                                               extensions,
                                               transform=train_transform)
        test_dataset = datasets.DatasetFolder(test_path,
                                              loader,
                                              extensions,
                                              transform=test_transform)

        return train_dataset, test_dataset

    else:
        test_path = os.path.join(data_path, 'ISBI2020_prep_Test_sigma10')
        test_dataset = datasets.DatasetFolder(test_path,
                                              loader,
                                              extensions,
                                              transform=test_transform)

        return test_dataset
Beispiel #5
0
def main(train_path, test_path):
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    torch.cuda.set_device(0)
    cudnn.benchmark = True
    cudnn.enabled = True

    net = simpleCNN()
    net = net.cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    print('loading dataset')
    train_data = dset.DatasetFolder(train_path,
                                    loader, ['ext'],
                                    transform=transform)
    test_data = dset.DatasetFolder(test_path,
                                   loader, ['ext'],
                                   transform=transform)
    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=64,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=64,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    print('training')
    for epoch in range(100):
        running_loss = 0.0
        train_total, train_correct = 0, 0
        valid_total, valid_correct = 0, 0
        for step, (input, labels) in enumerate(train_queue):
            input = input[0].cuda()
            labels = labels.cuda()
            optimizer.zero_grad()
            outputs = net(input)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_total += labels.size(0)
            _, predicted = torch.max(outputs.data, 1)
            train_correct += (predicted == labels).sum().item()
        for step, (input, labels) in enumerate(valid_queue):
            input = input[0].cuda()
            labels = labels.cuda()
            outputs = net(input)
            valid_total += labels.size(0)
            _, predicted = torch.max(outputs.data, 1)
            valid_correct += (predicted == labels).sum().item()
        print('at epoch %d: train_acc: %f, test_acc: %f' %
              (epoch, float(train_correct) / train_total,
               float(valid_correct) / valid_total))
def get_dataloaders(**kwargs):
    path = kwargs.get('path')
    path_train = os.path.join(path, 'train')
    path_test = os.path.join(path, 'test')

    data = pd.read_csv(os.path.join(path, 'train_labels.csv'))
    train_df = data.set_index('id')
    train_keys = train_df.index.values
    train_labels = np.asarray(train_df['label'].values)
    train_labels_dict = {
        train_keys[i]: train_labels[i]
        for i in range(len(train_keys))
    }

    general_utilities.create_labeled_dataset_folder(path_train,
                                                    train_labels_dict)

    transforms_dict = {}
    for phase in ['train', 'test', 'val']:
        transforms_dict[phase] = [
            getattr(transforms, t['name'])(**t.get('args', {}))
            for t in kwargs.get('transforms', {}).get(phase, [])
        ] + [transforms.ToTensor()]

    dataset_train = datasets.DatasetFolder(
        path_train,
        loader=dataset_folder.default_loader,
        extensions=['tif'],
        transform=transforms.Compose(transforms_dict['train']),
        target_transform=lambda xxx: torch.FloatTensor([xxx]))
    dataset_val = datasets.DatasetFolder(
        path_train,
        loader=dataset_folder.default_loader,
        extensions=['tif'],
        transform=transforms.Compose(transforms_dict['val']),
        target_transform=lambda xxx: torch.FloatTensor([xxx]))
    dataset_test = nn_utilities.ImageFolderWithPaths(
        path_test, transform=transforms.Compose(transforms_dict['test']))

    dataset_train_size = len(dataset_train)
    subset_samplers = nn_utilities.get_subset_dataset_sampler(
        kwargs['ratio'], range(dataset_train_size))

    dataloaders = {
        'train':
        torch.utils.data.DataLoader(dataset_train,
                                    sampler=subset_samplers['train'],
                                    **kwargs['args']),
        'val':
        torch.utils.data.DataLoader(dataset_val,
                                    sampler=subset_samplers['val'],
                                    **kwargs['args']),
        'test':
        torch.utils.data.DataLoader(dataset_test,
                                    **dict(kwargs['args'], shuffle=False))
    }
    return dataloaders
Beispiel #7
0
    def setUp(self):
        super().setUp()
        # parse command line and run
        parser = prepare_parser()
        parser = add_fid_parser(parser)
        config = vars(parser.parse_args())
        config = EasyDict(config)
        self.args = config

        self.transform = transforms.Compose([
            transforms.Resize((299, 299), Image.ANTIALIAS),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            #transforms.Normalize((0.0, 0.0, 0.0), (2.0, 2.0, 2.0)),
            #transforms.ToPILImage(),
        ])

        extensions = self.args.extensions.split(',')
        extensions += self.args.extensions.upper().split(',')
        extensions = tuple(extensions)
        self.reals_set = datasets.DatasetFolder(self.args.reals,
                                                transform=self.transform,
                                                target_transform=None,
                                                extensions=extensions,
                                                loader=pil_loader)
        self.fakes_set = datasets.DatasetFolder(
            self.args.fakes,
            transform=self.transform,
            target_transform=None,
            extensions=extensions,
            loader=pil_loader) if self.args.fakes else None
        self.reals_loader = DataLoader(dataset=self.reals_set,
                                       num_workers=self.args.num_workers,
                                       batch_size=self.args.batch_size,
                                       shuffle=True)
        self.fakes_loader = DataLoader(
            dataset=self.fakes_set,
            num_workers=self.args.num_workers,
            batch_size=self.args.batch_size,
            shuffle=True) if self.args.fakes else None
        self.sess = self.cached_session(interactive=True)
        self.model = inception_utils.WrapInception(
            inception.Inception3().eval(), resize_mode=None)
        self.saver = tf.train.Saver(var_list=tf.global_variables())
        self.saver.restore(self.sess, 'gs://ml-euw4/models/inception_v3.ckpt')
        self.batch_size = 8
        self.reals_infeed = ImageQueue(
            batch_size=self.batch_size,
            transform=lambda image: self.model(image)[0],
            shuffle=False)
        self.fakes_infeed = ImageQueue(
            batch_size=self.batch_size,
            transform=lambda image: self.model(image)[0],
            shuffle=False) if self.args.fakes else None
Beispiel #8
0
        def loaders(transform, batch_size):
            def png_reader(fname):
                im = np.float32(imageio.imread(fname))  # 640x480
                im = im[:400]  # 640 x 400
                im -= im.mean()
                impl = Image.fromarray(
                    im / 8192.0)  # convert to PIL with range roughy [-1,1]
                return impl.resize((320, 200), Image.BILINEAR)  # 320 x 200

            def rgb_reader(fname):
                im = np.float32(imageio.imread(fname))  # 1280 x 800
                im = np.dot(im[..., :3], [0.299, 0.587, 0.114])  # to grayscale
                im -= im.mean()
                impl = Image.fromarray(im / 128.0)  # roughly to [-1,1]
                return impl.resize((320, 200), Image.BILINEAR)  # 320 x 200

            def _init_fn(worker_id):
                seed = 12 + worker_id
                np.random.seed(seed)
                torch.manual_seed(seed)

            # the dataset indexec are shuffled by the main process
            torch.manual_seed(int(time.time()))
            np.random.seed(int(time.time()))  # init randomly each time

            rgb_set = datasets.DatasetFolder(os.path.join(path, 'RGB'),
                                             loader=rgb_reader,
                                             extensions=['.jpg'],
                                             transform=transform)

            fir_set = datasets.DatasetFolder(os.path.join(path, 'FIR'),
                                             loader=png_reader,
                                             extensions=['.png'],
                                             transform=transform)

            rgb_loader = DataLoader(
                rgb_set,
                shuffle=True,
                batch_size=batch_size,
                num_workers=num_workers,
                drop_last=True,
                worker_init_fn=_init_fn,
                collate_fn=collate_fn)  # pin_memory=(gpucount>1)

            fir_loader = DataLoader(
                fir_set,
                shuffle=True,
                batch_size=batch_size,
                num_workers=num_workers,
                drop_last=True,
                worker_init_fn=_init_fn,
                collate_fn=collate_fn)  #pin_memory=(gpucount>1)

            return {'RGB': rgb_loader, 'FIR': fir_loader}
Beispiel #9
0
def mnist_custom_split(split_ratio=0.8,
                       random_seed=0,
                       shuffle_dataset=True,
                       dataset='mnist'):
    """
    Returns two torch.utils.data.SubsetRandomSamplers for split_ratio part of
    the dataset and the 1 - split_ratio part of the dataset.

    Args:
        split_ratio (float): How much is the split of the dataset
        random_seed (int): The seed of the shuffling of the dataset. By default,
            we shuffle the dataset and then pick split_ratio*dataset samples

    Returns:
        tuple of torch.utils.data.SubsetRandomSamplers: (sampler_1, sampler_2)
            where sampler_1 randomly (acc to seed) selects split_ratio *
            size(dataset) and sampler_2 randomly (according to seed) selects (1
            - split_ratio) * size(dataset).
    """
    if dataset[:5] == 'mnist':
        dataset = datasets.MNIST(definitions.DATA_PATH)
    elif dataset[:6] == 'hmnist':
        dataset = datasets.DatasetFolder(definitions.HMNIST_DATA_FOLDER,
                                         data_loader, ALL_EXTS),
    elif dataset[:8] == 'diamonds':
        dataset = datasets.DatasetFolder(definitions.DIAMONDS_DATA_FOLDER,
                                         data_loader, ALL_EXTS),
    else:
        print('[ERROR] Unknown dataset for split_and_train! => %s' % dataset)
        exit(1)

    dataset_size = len(dataset)

    indices = list(range(dataset_size))
    split = int(np.floor(split_ratio * dataset_size))
    logger.debug('Split dataset {}'.format(split))
    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
    # ==> Mistakes
    # train_indices, val_indices = indices[split:], indices[:split]
    train_indices, val_indices = indices[:split], indices[split:]

    # Creating PT data samplers and loaders:
    train_sampler = torch.utils.data.SubsetRandomSampler(train_indices)
    valid_sampler = torch.utils.data.SubsetRandomSampler(val_indices)

    return train_sampler, valid_sampler
Beispiel #10
0
def data_loader(data_dir, batch_size=20, valid_size=0.2):
    def npy_loader(img_path):
        sample = torch.from_numpy(np.load(img_path))
        return sample

    dataset = datasets.DatasetFolder(root=data_dir,
                                     loader=npy_loader,
                                     extensions=('.npy'))
    # number of subprocesses to use for data loading
    num_workers = 0
    # how many samples per batch to load

    num_train = len(dataset)
    indices = list(range(num_train))
    np.random.shuffle(indices)
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # prepare data loaders (combine dataset and sampler)
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=batch_size,
                                               sampler=train_sampler,
                                               num_workers=num_workers)
    valid_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=batch_size,
                                               sampler=valid_sampler,
                                               num_workers=num_workers)

    return train_loader, valid_loader, dataset.classes
Beispiel #11
0
def load_dataset():
    '''
    Load the npy file dataset given in dataroot, put it in a dataloader and
    select the device to run the training
    : return : loaded dataset, dataloader, device to run training
    '''
    # Create the dataset
    dataset = dset.DatasetFolder(root=dataroot,
                                 transform=transforms.Compose(
                                     [transforms.Lambda(dense_to_one_hot)]),
                                 loader=npy_loader,
                                 extensions=['.npy'])
    # Create the dataloader
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=workers)

    # Decide which device we want to run on
    device = torch.device("cuda:0" if (
        torch.cuda.is_available() and ngpu > 0) else "cpu")

    # Plot some training images
    real_batch = next(iter(dataloader))
    display_city(real_batch[0][0], win_name='Training Example')

    return dataset, dataloader, device
Beispiel #12
0
 def __init__(self, dataset=None, batch_size=128, shuffle=False, validation_split=0.0, weighted_sample = False, num_workers=1, data_dir="data/processed", training=True):
     self.dataset = dataset        
     self.data_dir = data_dir   
     if dataset is not None:
         self.vertical_crop = dataset.vertical_crop   
         self.horizontal_crop = dataset.horizontal_crop     
         if dataset.mode == 'xeno':
         # Stack of numpy melspecs -> one torch melspec
         #self.horizontal_crop=dataset.horizontal_crop - 1
             trsfm = transforms.Compose([
                 RandomImage(dataset.split_files, self.horizontal_crop),
                 #Superimpose(self.dataset, dataset.split_files, self.horizontal_crop),
                 NormalizeLabels(),
                 ThreeChannel(),
                 NumpyStackToTensors()
             #transforms.RandomCrop(size = (self.vertical_crop, self.horizontal_crop), pad_if_needed=True, padding_mode = 'constant')
         ])
         else:
             trsfm = transforms.Compose([
                 # RandomImage(),
                 ThreeChannel(),
                 AxisOrderChange(),
                 NumpyStackToTensors(),
                 Crop()
                 #transforms.ToTensor(),
                 #transforms.RandomCrop(size = (self.vertical_crop, self.horizontal_crop), pad_if_needed=True, padding_mode = 'constant')
             ])
         dataset.set_transform(trsfm)
     else:
         self.vertical_crop = 128     
         self.horizontal_crop = 281 
         dataset = datasets.DatasetFolder(root = self.data_dir, loader = self.default_loader, transform = trsfm, extensions=('.pickle'))
     super().__init__(self.dataset, batch_size, shuffle, validation_split, weighted_sample, num_workers)
Beispiel #13
0
def init_ld(root_dir, tform, batch_size, shuffle, num_workers, load_type):

    if load_type == "none":
        if os.path.exists(root_dir):
            data = datasets.ImageFolder(root=root_dir, transform=tform)
        else:
            data = None
    elif "feature" in load_type or "logit" in load_type:

        def pickle_loader(input):
            return pickle.load(open(input, 'rb'))

        target_dir = root_dir + "_%s" % (load_type)
        if os.path.exists(target_dir):
            data = datasets.DatasetFolder(root=target_dir,
                                          loader=pickle_loader,
                                          extensions=("pk"))
        else:
            data = None
    if data is None:
        return None
    else:
        return tc.utils.data.DataLoader(data,
                                        batch_size=batch_size,
                                        shuffle=shuffle,
                                        drop_last=False,
                                        num_workers=num_workers)
Beispiel #14
0
def get_dataset(name, data_dir, size=64, lsun_categories=None):
    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())),
    ])

    if name == 'image':
        dataset = datasets.ImageFolder(data_dir, transform)
    elif name == 'npy':
        # Only support normalization for now
        dataset = datasets.DatasetFolder(data_dir, npy_loader, ['npy'])
    elif name == 'cifar10':
        dataset = datasets.CIFAR10(root=data_dir,
                                   train=True,
                                   download=True,
                                   transform=transform)
    elif name == 'lsun':
        if lsun_categories is None:
            lsun_categories = 'train'
        dataset = datasets.LSUN(data_dir, lsun_categories, transform)
    elif name == 'lsun_class':
        dataset = datasets.LSUNClass(data_dir,
                                     transform,
                                     target_transform=(lambda t: 0))
    else:
        raise NotImplemented

    return dataset
Beispiel #15
0
def get_dataset(name, data_dir, size=64, lsun_categories=None):
    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())),
    ])

    if name == 'image':
        dataset = datasets.ImageFolder(data_dir, transform)
    elif name == 'npy':
        # Only support normalization for now
        dataset = datasets.DatasetFolder(data_dir, npy_loader, ('npy', ))
    elif name == 'synthetic':

        def transform(x):
            return x * 2 - 1

        data_path = os.path.join(
            data_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz')
        data_tensor = np.load(data_path, encoding='bytes')
        data_tensor = torch.from_numpy(
            data_tensor['imgs']).unsqueeze(1).float()
        dataset = CustomTensorDataset(data_tensor,
                                      type='dsprites',
                                      transform=transform)
    else:
        raise NotImplemented

    return dataset
def get_dataset(name,
                data_dir,
                size=64,
                lsun_categories=None,
                deterministic=False,
                transform=None):

    transform = transforms.Compose([
        t for t in [
            transforms.Resize(size),
            transforms.CenterCrop(size),
            (not deterministic) and transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            (not deterministic) and transforms.Lambda(
                lambda x: x + 1. / 128 * torch.rand(x.size())),
        ] if t is not False
    ]) if transform == None else transform

    if name == 'image':
        print('Using image labels')
        dataset = datasets.ImageFolder(data_dir, transform)
        nlabels = len(dataset.classes)
    elif name == 'webp':
        print('Using no labels from webp')
        dataset = CachedImageFolder(data_dir, transform)
        nlabels = len(dataset.classes)
    elif name == 'npy':
        # Only support normalization for now
        dataset = datasets.DatasetFolder(data_dir, npy_loader, ['npy'])
        nlabels = len(dataset.classes)
    elif name == 'cifar10':
        dataset = datasets.CIFAR10(root=data_dir,
                                   train=True,
                                   download=True,
                                   transform=transform)
        nlabels = 10
    elif name == 'stacked_mnist':
        dataset = StackedMNIST(data_dir,
                               transform=transforms.Compose([
                                   transforms.Resize(size),
                                   transforms.CenterCrop(size),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, ), (0.5, ))
                               ]))
        nlabels = 1000
    elif name == 'lsun':
        if lsun_categories is None:
            lsun_categories = 'train'
        dataset = datasets.LSUN(data_dir, lsun_categories, transform)
        nlabels = len(dataset.classes)
    elif name == 'lsun_class':
        dataset = datasets.LSUNClass(data_dir,
                                     transform,
                                     target_transform=(lambda t: 0))
        nlabels = 1
    else:
        raise NotImplemented
    return dataset, nlabels
def get_train_dataset(dataset_name,
                      traindir,
                      vision_type,
                      colour_space,
                      other_transformations,
                      normalize,
                      target_size,
                      target_transform=None,
                      random_labels=False):
    colour_transformations = preprocessing.colour_transformation(
        vision_type, colour_space)
    chns_transformation = preprocessing.channel_transformation(
        vision_type, colour_space)

    transformations = prepare_transformations_train(
        dataset_name,
        colour_transformations,
        other_transformations,
        chns_transformation,
        normalize,
        target_size,
        random_labels=random_labels)
    if dataset_name in folder_dbs:
        if random_labels:
            train_dataset = custom_datasets.RandomImageNet(
                traindir,
                transform=transformations,
                loader=pil2numpy_loader,
                target_transform=target_transform)
        else:
            train_dataset = datasets.ImageFolder(
                traindir,
                transformations,
                loader=pil2numpy_loader,
                target_transform=target_transform,
                is_valid_file=is_image_file)
    elif dataset_name == 'cifar10':
        train_dataset = datasets.CIFAR10(traindir,
                                         train=True,
                                         download=False,
                                         transform=transformations)
    elif dataset_name == 'cifar100':
        train_dataset = datasets.CIFAR100(traindir,
                                          train=True,
                                          download=False,
                                          transform=transformations)
    elif 'wcs_lms' in dataset_name:
        data_loader_train = lambda x: npy_data_loader(x)

        train_dataset = datasets.DatasetFolder(traindir, data_loader_train,
                                               ('.npy', ), transformations)
    elif 'wcs_jpg' in dataset_name:
        train_dataset = datasets.ImageFolder(traindir,
                                             transformations,
                                             loader=pil2numpy_loader)
    else:
        sys.exit('Dataset %s is not supported.' % dataset_name)

    return train_dataset
Beispiel #18
0
def prepare_det_data_loaders(trans, folder_path, ext_list, batch_size, shuffle_tag, kwargs,
                         sampler=None):
    temp = torch.utils.data.DataLoader(
        datasets.DatasetFolder(folder_path, data_loader, ext_list, transform=trans),
        batch_size=batch_size, shuffle=shuffle_tag, sampler=sampler,
        worker_init_fn=torch.manual_seed(0), **kwargs
    )
    return temp
Beispiel #19
0
def get_dataset(name,
                data_dir,
                size=64,
                lsun_categories=None,
                load_in_mem=False):
    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())),
    ])
    data_dir = os.path.expanduser(data_dir)
    if name == 'image':
        dataset = datasets.ImageFolder(data_dir, transform)
        nlabels = len(dataset.classes)
    elif name == 'hdf5':
        from TOOLS.make_hdf5 import Dataset_HDF5
        transform = transforms.Compose([
            transforms.Lambda(lambda x: x.transpose(1, 2, 0)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())),
        ])
        dataset = Dataset_HDF5(root=data_dir,
                               transform=transform,
                               load_in_mem=load_in_mem)
        nlabels = len(dataset.classes)
    elif name == 'npy':
        # Only support normalization for now
        dataset = datasets.DatasetFolder(data_dir, npy_loader, ['npy'])
        nlabels = len(dataset.classes)
    elif name == 'cifar10':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
        dataset = datasets.CIFAR10(root=data_dir,
                                   train=True,
                                   download=True,
                                   transform=transform)
        nlabels = 10
    elif name == 'lsun':
        if lsun_categories is None:
            lsun_categories = 'train'
        dataset = datasets.LSUN(data_dir, lsun_categories, transform)
        nlabels = len(dataset.classes)
    elif name == 'lsun_class':
        dataset = datasets.LSUNClass(data_dir,
                                     transform,
                                     target_transform=(lambda t: 0))
        nlabels = 1
    else:
        raise NotImplemented

    return dataset, nlabels
Beispiel #20
0
    def load_data(self):
        # load benign mnist dataset
        trans = transforms.Compose([transforms.ToTensor()])
        folder_path = os.path.join(definitions.TROJAN_DIR, 'benign_mnist')
        log_str = 'Data Folder -> %s' % folder_path
        print(log_str)
        logger.debug(log_str)
        ## load test dataset
        benign_test_loader = torch.utils.data.DataLoader(
            datasets.DatasetFolder(os.path.join(folder_path, 'test'),
                                   self_data_loader, ['bin'],
                                   transform=trans),
            batch_size=self.args.test_batch_size,
            shuffle=False)
        self.benign_test_dict = data_loader2dict(benign_test_loader)

        benign_test_loader = torch.utils.data.DataLoader(
            datasets.DatasetFolder(os.path.join(folder_path, 'test'),
                                   self_data_loader, ['bin'],
                                   transform=trans),
            batch_size=self.args.test_batch_size,
            shuffle=False)
        self.trojan_test_dict = data_loader2dict(benign_test_loader)

        ## load train dataset
        benign_train_loader = torch.utils.data.DataLoader(
            datasets.DatasetFolder(os.path.join(folder_path, 'train'),
                                   self_data_loader, ['bin'],
                                   transform=trans),
            batch_size=self.args.batch_size,
            shuffle=True,
            worker_init_fn=torch.manual_seed(0))
        self.benign_train_dict = data_loader2dict(benign_train_loader)

        benign_train_loader = torch.utils.data.DataLoader(
            datasets.DatasetFolder(os.path.join(folder_path, 'train'),
                                   self_data_loader, ['bin'],
                                   transform=trans),
            batch_size=self.args.batch_size,
            shuffle=True,
            worker_init_fn=torch.manual_seed(0))
        self.trojan_train_dict = data_loader2dict(benign_train_loader)
        print('batch_size: ', self.args.batch_size)
        print('#batch: ', len(self.trojan_train_dict))
Beispiel #21
0
def generate_dataset_cv(data_path, loader, extensions, train_transform,
                        test_transform):
    #    train_path = os.path.join(data_path, 'ISBI2020_prep_Mix_sigma10')
    #    train_path = os.path.join(data_path, 'ISBI2020_prep_Train_sigma10')
    train_path = os.path.join(data_path, 'Kaggle2019_prep_train_sigma10')
    train_dataset = datasets.DatasetFolder(train_path,
                                           loader,
                                           extensions,
                                           transform=train_transform)

    return train_dataset
Beispiel #22
0
def make_dataset(path):
    transform = Compose([
        Resize((64, 64)),
        ToTensor(),
        Normalize(mean=[0.5, 0.5, 0.5], std=[0.55, 0.55, 0.55])
    ])
    dataset = dset.DatasetFolder(path,
                                 default_loader,
                                 extensions=('jpg', ),
                                 transform=transform)
    return dataset
 def cal_train_data(self):
     dataset = datasets.DatasetFolder(
         root=self.data_path,
         loader=lambda x: torch.from_numpy(np.load(x)),
         extensions='.npy',
         transform=MySimCLRTrainDataTransform(self.img_size))
     #dataset = datasets.ImageFolder(root=self.data_path,
     #                            transform=MySimCLRTrainDataTransform(self.img_size))
     num_train = len(dataset)
     split = int(np.floor(self.test_ratio * num_train))
     return num_train - split
Beispiel #24
0
def generate_dataset(data_path, loader, extensions, train_transform,
                     test_transform):
    train_path = os.path.join(data_path, 'ISBI2020_prep_Mix_sigma10')
    #    test_path = os.path.join(data_path, 'ISBI2020_prep_Test_sigma10')
    #    val_path = os.path.join(data_path, 'val')

    train_dataset = datasets.DatasetFolder(train_path,
                                           loader,
                                           extensions,
                                           transform=train_transform)
    #    test_dataset = datasets.DatasetFolder(test_path, loader, extensions, transform=test_transform)
    #    val_dataset = datasets.DatasetFolder(val_path, loader, extensions, transform=test_transform)

    return train_dataset
def jsonloader(data_dir, img_size, batchSize):
    # Preprocessing: Resize, brightness corrections
    dataset = dset.DatasetFolder(root=data_dir,
                                 loader=None,
                                 transform=transforms.Compose([
                                     transforms.Resize(img_size),
                                     transforms.ToTensor(),
                                 ]))
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batchSize,
                                             shuffle=True)

    print('Data size:', len(dataset), 'json')

    return dataloader
Beispiel #26
0
def get_dataset(name, data_dir, size=64, lsun_categories=None, config=None):
    transform = transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())),
    ])

    if name == "MoG":
        dataset = MixtureOfGaussianDataset(config)
        nlabels = 1
    elif name.lower() == "celeba":
        imgs = np.load("/home/LargeData/celebA_64x64.npy")
        labels = np.zeros([imgs.shape[0]]).astype(np.int64)
        dataset = NumpyImageDataset(imgs, labels, transform)
        nlabels = 1
    elif name == 'image':
        dataset = datasets.ImageFolder(data_dir, transform)
        nlabels = len(dataset.classes)
    elif name == 'npy':
        # Only support normalization for now
        dataset = datasets.DatasetFolder(data_dir, npy_loader, 'npy')
        nlabels = len(dataset.classes)
    elif name == 'cifar10':
        dataset = datasets.CIFAR10(root=data_dir,
                                   train=True,
                                   download=True,
                                   transform=transform)
        nlabels = 10
    elif name == 'lsun':
        if lsun_categories is None:
            lsun_categories = 'train'
        dataset = datasets.LSUN(data_dir, lsun_categories, transform)
        nlabels = len(dataset.classes)
    elif name == 'lsun_class':
        dataset = datasets.LSUNClass(data_dir,
                                     transform,
                                     target_transform=(lambda t: 0))
        nlabels = 1
    else:
        raise NotImplementedError

    return dataset, nlabels
    def val_dataloader(self):
        dataset = datasets.DatasetFolder(
            root=self.data_path,
            loader=lambda x: torch.from_numpy(np.load(x)),
            extensions='.npy',
            transform=MySimCLREvalDataTransform(self.img_size))
        num_train = len(dataset)
        indices = list(range(num_train))
        split = int(np.floor(self.test_ratio * num_train))

        val_idx = indices[:split]
        val_sampler = SubsetRandomSampler(val_idx)
        self.num_val_imgs = len(val_idx)
        self.sample_dataloader = DataLoader(dataset,
                                            num_workers=50,
                                            batch_size=144,
                                            sampler=val_sampler,
                                            drop_last=True)
        return self.sample_dataloader
Beispiel #28
0
def load_observations(
    game: GymGame,
    random_split: bool,
    observations_dir,
    batch_size=32,
    drop_z_values=True,
    validation_percentage=0.1,
):
    """ Load observations from disk and return a dataset and dataloader.

    Observations are loaded from *observations_dir*. drop_z_values drops the z and
    next_z parameters from the dataset. random_split controls wether the dataset is
    split randomly into training/validation subsets or not.
    """
    def load_and_transform(filename):
        obs_dict = Observation.load_as_dict(filename)
        obs_dict["screen"] = transform(obs_dict["screen"])
        if drop_z_values:
            del obs_dict["z"]
            del obs_dict["next_z"]
        return obs_dict

    observations_dir /= game.key
    dataset = datasets.DatasetFolder(
        root=str(observations_dir),
        loader=load_and_transform,
        extensions=Observation.FILE_EXTENSION,
    )

    dataset_size = len(dataset)
    validation_size = int(dataset_size * validation_percentage)
    training_size = dataset_size - validation_size

    if random_split:
        validation_ds, training_ds = torch.utils.data.dataset.random_split(
            dataset, [validation_size, training_size])
    else:
        validation_ds = Subset(dataset, range(0, validation_size))
        training_ds = Subset(dataset, range(validation_size, dataset_size))

    validation_dl = DataLoader(validation_ds, batch_size=batch_size)
    training_dl = DataLoader(training_ds, batch_size=batch_size)
    return training_dl, validation_dl
Beispiel #29
0
    def predict_samples(self, samples_dir):
        if not os.path.exists(self.saved_model):
            print('No saved model in {}'.format(self.saved_model))
            exit(1)

        self.model.load_state_dict(torch.load(self.saved_model, map_location={'cuda:0': 'cpu'}))
        self.model.eval()

        trans = transforms.Compose([transforms.ToTensor()])
        test_loader = torch.utils.data.DataLoader(
            datasets.DatasetFolder(samples_dir, data_loader, ['bin'],
                                   transform=trans), batch_size=1)

        for data, _ in test_loader:
            data = Variable(data)
            torch.no_grad()
            output = self.model(data)
            pred = output.data.max(1, keepdim=True)[1]
            logger.debug('Input: {} - Pred : {}'.format(data, pred))
Beispiel #30
0
    def predict_tandem(self, samples_dir, nn2):
        self.model.load_state_dict(
            torch.load(self.saved_model, map_location={'cuda:0': 'cpu'}))
        self.model.eval()

        trans = transforms.Compose([transforms.ToTensor()])
        test_loader = torch.utils.data.DataLoader(datasets.DatasetFolder(
            samples_dir, data_loader, ['bin'], transform=trans),
                                                  batch_size=1)
        nn2.model.load_state_dict(
            torch.load(nn2.saved_model, map_location={'cuda:0': 'cpu'}))
        nn2.model.eval()

        torch.no_grad()
        for data, _ in test_loader:
            data = Variable(data)
            output = self.model(data)
            pred1 = output.data.max(1, keepdim=True)[1]
            output = nn2.model(data)
            pred2 = output.data.max(1, keepdim=True)[1]
            logger.debug('Input: {} - Pred 1: {} - pred 2: {}'.format(
                data, pred1, pred2))