def imagenette_dataloaders(config): transforms_train = transforms.Compose([transforms.Resize((224,224)), transforms.RandomCrop(224, padding=28), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4625, 0.4580, 0.4295),(0.3901, 0.3880, 0.4042)) ]) transforms_test = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize((0.4625, 0.4580, 0.4295),(0.3901, 0.3880, 0.4042)) ]) train_set = datasets.DatasetFolder(root='./data/imagenette2/train', loader=image_loader, is_valid_file=is_valid_file, transform=transforms_train) test_set = datasets.DatasetFolder(root='./data/imagenette2/val', loader=image_loader, is_valid_file=is_valid_file, transform=transforms_test) train_loader = DataLoader(train_set, batch_size = config['batch_size'], shuffle = True, pin_memory = True, num_workers = 8, drop_last = False) test_loader = DataLoader(test_set, batch_size = config['test_batch_size'], shuffle=False, pin_memory = True, num_workers = 8, drop_last = False) return train_loader, test_loader
def build_dataloaders(train_dir, test_dir, batch_size): train_softmax_dataset = datasets.DatasetFolder(train_dir, loader=load_images_softmax, extensions=(".mp3", )) train_softmax_loader = torch.utils.data.DataLoader(train_softmax_dataset, batch_size=batch_size, shuffle=True) test_softmax_dataset = datasets.DatasetFolder(test_dir, loader=load_images_softmax, extensions=(".mp3", )) test_softmax_loader = torch.utils.data.DataLoader(test_softmax_dataset, batch_size=batch_size, shuffle=False) train_softmax_loader.idx_to_class = { i: c for c, i in train_softmax_dataset.class_to_idx.items() } test_softmax_loader.idx_to_class = { i: c for c, i in test_softmax_dataset.class_to_idx.items() } return train_softmax_loader, test_softmax_loader
def load_data(dataset_dir, batch_size, num_workers): if args.evaluation: test_dataset = datasets.DatasetFolder(root=os.path.join( dataset_dir, 'test'), loader=pickle_loader, extensions=['pkl']) test_dataset_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) return test_dataset_loader, len(test_dataset) else: train_val_dataset = { tag: datasets.DatasetFolder(root=os.path.join(dataset_dir, tag), loader=pickle_loader, extensions=['pkl']) for tag in ['train', 'val'] } train_val_dataset_size = { tag: len(train_val_dataset[tag]) for tag in ['train', 'val'] } train_val_dataset_loader = { tag: torch.utils.data.DataLoader(train_val_dataset[tag], batch_size=batch_size, shuffle=True, num_workers=num_workers) for tag in ['train', 'val'] } return train_val_dataset_loader, train_val_dataset_size
def generate_dataset(data_path, loader, extensions, train_transform, test_transform, mode=None): if not mode: # train_path = os.path.join(data_path, 'Kaggle2015+2019_prep_train_sigma10') train_path = os.path.join(data_path, 'Kaggle2015_prep_train_sigma10') # train_path = os.path.join(data_path, 'ISBI2020_prep_Train_sigma10') test_path = os.path.join(data_path, 'ISBI2020_prep_Test_sigma10') train_dataset = datasets.DatasetFolder(train_path, loader, extensions, transform=train_transform) test_dataset = datasets.DatasetFolder(test_path, loader, extensions, transform=test_transform) return train_dataset, test_dataset else: test_path = os.path.join(data_path, 'ISBI2020_prep_Test_sigma10') test_dataset = datasets.DatasetFolder(test_path, loader, extensions, transform=test_transform) return test_dataset
def main(train_path, test_path): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(0) cudnn.benchmark = True cudnn.enabled = True net = simpleCNN() net = net.cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) print('loading dataset') train_data = dset.DatasetFolder(train_path, loader, ['ext'], transform=transform) test_data = dset.DatasetFolder(test_path, loader, ['ext'], transform=transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=False, pin_memory=True, num_workers=2) print('training') for epoch in range(100): running_loss = 0.0 train_total, train_correct = 0, 0 valid_total, valid_correct = 0, 0 for step, (input, labels) in enumerate(train_queue): input = input[0].cuda() labels = labels.cuda() optimizer.zero_grad() outputs = net(input) loss = criterion(outputs, labels) loss.backward() optimizer.step() train_total += labels.size(0) _, predicted = torch.max(outputs.data, 1) train_correct += (predicted == labels).sum().item() for step, (input, labels) in enumerate(valid_queue): input = input[0].cuda() labels = labels.cuda() outputs = net(input) valid_total += labels.size(0) _, predicted = torch.max(outputs.data, 1) valid_correct += (predicted == labels).sum().item() print('at epoch %d: train_acc: %f, test_acc: %f' % (epoch, float(train_correct) / train_total, float(valid_correct) / valid_total))
def get_dataloaders(**kwargs): path = kwargs.get('path') path_train = os.path.join(path, 'train') path_test = os.path.join(path, 'test') data = pd.read_csv(os.path.join(path, 'train_labels.csv')) train_df = data.set_index('id') train_keys = train_df.index.values train_labels = np.asarray(train_df['label'].values) train_labels_dict = { train_keys[i]: train_labels[i] for i in range(len(train_keys)) } general_utilities.create_labeled_dataset_folder(path_train, train_labels_dict) transforms_dict = {} for phase in ['train', 'test', 'val']: transforms_dict[phase] = [ getattr(transforms, t['name'])(**t.get('args', {})) for t in kwargs.get('transforms', {}).get(phase, []) ] + [transforms.ToTensor()] dataset_train = datasets.DatasetFolder( path_train, loader=dataset_folder.default_loader, extensions=['tif'], transform=transforms.Compose(transforms_dict['train']), target_transform=lambda xxx: torch.FloatTensor([xxx])) dataset_val = datasets.DatasetFolder( path_train, loader=dataset_folder.default_loader, extensions=['tif'], transform=transforms.Compose(transforms_dict['val']), target_transform=lambda xxx: torch.FloatTensor([xxx])) dataset_test = nn_utilities.ImageFolderWithPaths( path_test, transform=transforms.Compose(transforms_dict['test'])) dataset_train_size = len(dataset_train) subset_samplers = nn_utilities.get_subset_dataset_sampler( kwargs['ratio'], range(dataset_train_size)) dataloaders = { 'train': torch.utils.data.DataLoader(dataset_train, sampler=subset_samplers['train'], **kwargs['args']), 'val': torch.utils.data.DataLoader(dataset_val, sampler=subset_samplers['val'], **kwargs['args']), 'test': torch.utils.data.DataLoader(dataset_test, **dict(kwargs['args'], shuffle=False)) } return dataloaders
def setUp(self): super().setUp() # parse command line and run parser = prepare_parser() parser = add_fid_parser(parser) config = vars(parser.parse_args()) config = EasyDict(config) self.args = config self.transform = transforms.Compose([ transforms.Resize((299, 299), Image.ANTIALIAS), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), #transforms.Normalize((0.0, 0.0, 0.0), (2.0, 2.0, 2.0)), #transforms.ToPILImage(), ]) extensions = self.args.extensions.split(',') extensions += self.args.extensions.upper().split(',') extensions = tuple(extensions) self.reals_set = datasets.DatasetFolder(self.args.reals, transform=self.transform, target_transform=None, extensions=extensions, loader=pil_loader) self.fakes_set = datasets.DatasetFolder( self.args.fakes, transform=self.transform, target_transform=None, extensions=extensions, loader=pil_loader) if self.args.fakes else None self.reals_loader = DataLoader(dataset=self.reals_set, num_workers=self.args.num_workers, batch_size=self.args.batch_size, shuffle=True) self.fakes_loader = DataLoader( dataset=self.fakes_set, num_workers=self.args.num_workers, batch_size=self.args.batch_size, shuffle=True) if self.args.fakes else None self.sess = self.cached_session(interactive=True) self.model = inception_utils.WrapInception( inception.Inception3().eval(), resize_mode=None) self.saver = tf.train.Saver(var_list=tf.global_variables()) self.saver.restore(self.sess, 'gs://ml-euw4/models/inception_v3.ckpt') self.batch_size = 8 self.reals_infeed = ImageQueue( batch_size=self.batch_size, transform=lambda image: self.model(image)[0], shuffle=False) self.fakes_infeed = ImageQueue( batch_size=self.batch_size, transform=lambda image: self.model(image)[0], shuffle=False) if self.args.fakes else None
def loaders(transform, batch_size): def png_reader(fname): im = np.float32(imageio.imread(fname)) # 640x480 im = im[:400] # 640 x 400 im -= im.mean() impl = Image.fromarray( im / 8192.0) # convert to PIL with range roughy [-1,1] return impl.resize((320, 200), Image.BILINEAR) # 320 x 200 def rgb_reader(fname): im = np.float32(imageio.imread(fname)) # 1280 x 800 im = np.dot(im[..., :3], [0.299, 0.587, 0.114]) # to grayscale im -= im.mean() impl = Image.fromarray(im / 128.0) # roughly to [-1,1] return impl.resize((320, 200), Image.BILINEAR) # 320 x 200 def _init_fn(worker_id): seed = 12 + worker_id np.random.seed(seed) torch.manual_seed(seed) # the dataset indexec are shuffled by the main process torch.manual_seed(int(time.time())) np.random.seed(int(time.time())) # init randomly each time rgb_set = datasets.DatasetFolder(os.path.join(path, 'RGB'), loader=rgb_reader, extensions=['.jpg'], transform=transform) fir_set = datasets.DatasetFolder(os.path.join(path, 'FIR'), loader=png_reader, extensions=['.png'], transform=transform) rgb_loader = DataLoader( rgb_set, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True, worker_init_fn=_init_fn, collate_fn=collate_fn) # pin_memory=(gpucount>1) fir_loader = DataLoader( fir_set, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True, worker_init_fn=_init_fn, collate_fn=collate_fn) #pin_memory=(gpucount>1) return {'RGB': rgb_loader, 'FIR': fir_loader}
def mnist_custom_split(split_ratio=0.8, random_seed=0, shuffle_dataset=True, dataset='mnist'): """ Returns two torch.utils.data.SubsetRandomSamplers for split_ratio part of the dataset and the 1 - split_ratio part of the dataset. Args: split_ratio (float): How much is the split of the dataset random_seed (int): The seed of the shuffling of the dataset. By default, we shuffle the dataset and then pick split_ratio*dataset samples Returns: tuple of torch.utils.data.SubsetRandomSamplers: (sampler_1, sampler_2) where sampler_1 randomly (acc to seed) selects split_ratio * size(dataset) and sampler_2 randomly (according to seed) selects (1 - split_ratio) * size(dataset). """ if dataset[:5] == 'mnist': dataset = datasets.MNIST(definitions.DATA_PATH) elif dataset[:6] == 'hmnist': dataset = datasets.DatasetFolder(definitions.HMNIST_DATA_FOLDER, data_loader, ALL_EXTS), elif dataset[:8] == 'diamonds': dataset = datasets.DatasetFolder(definitions.DIAMONDS_DATA_FOLDER, data_loader, ALL_EXTS), else: print('[ERROR] Unknown dataset for split_and_train! => %s' % dataset) exit(1) dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(split_ratio * dataset_size)) logger.debug('Split dataset {}'.format(split)) if shuffle_dataset: np.random.seed(random_seed) np.random.shuffle(indices) # ==> Mistakes # train_indices, val_indices = indices[split:], indices[:split] train_indices, val_indices = indices[:split], indices[split:] # Creating PT data samplers and loaders: train_sampler = torch.utils.data.SubsetRandomSampler(train_indices) valid_sampler = torch.utils.data.SubsetRandomSampler(val_indices) return train_sampler, valid_sampler
def data_loader(data_dir, batch_size=20, valid_size=0.2): def npy_loader(img_path): sample = torch.from_numpy(np.load(img_path)) return sample dataset = datasets.DatasetFolder(root=data_dir, loader=npy_loader, extensions=('.npy')) # number of subprocesses to use for data loading num_workers = 0 # how many samples per batch to load num_train = len(dataset) indices = list(range(num_train)) np.random.shuffle(indices) split = int(np.floor(valid_size * num_train)) train_idx, valid_idx = indices[split:], indices[:split] # define samplers for obtaining training and validation batches train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # prepare data loaders (combine dataset and sampler) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) valid_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers) return train_loader, valid_loader, dataset.classes
def load_dataset(): ''' Load the npy file dataset given in dataroot, put it in a dataloader and select the device to run the training : return : loaded dataset, dataloader, device to run training ''' # Create the dataset dataset = dset.DatasetFolder(root=dataroot, transform=transforms.Compose( [transforms.Lambda(dense_to_one_hot)]), loader=npy_loader, extensions=['.npy']) # Create the dataloader dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers) # Decide which device we want to run on device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") # Plot some training images real_batch = next(iter(dataloader)) display_city(real_batch[0][0], win_name='Training Example') return dataset, dataloader, device
def __init__(self, dataset=None, batch_size=128, shuffle=False, validation_split=0.0, weighted_sample = False, num_workers=1, data_dir="data/processed", training=True): self.dataset = dataset self.data_dir = data_dir if dataset is not None: self.vertical_crop = dataset.vertical_crop self.horizontal_crop = dataset.horizontal_crop if dataset.mode == 'xeno': # Stack of numpy melspecs -> one torch melspec #self.horizontal_crop=dataset.horizontal_crop - 1 trsfm = transforms.Compose([ RandomImage(dataset.split_files, self.horizontal_crop), #Superimpose(self.dataset, dataset.split_files, self.horizontal_crop), NormalizeLabels(), ThreeChannel(), NumpyStackToTensors() #transforms.RandomCrop(size = (self.vertical_crop, self.horizontal_crop), pad_if_needed=True, padding_mode = 'constant') ]) else: trsfm = transforms.Compose([ # RandomImage(), ThreeChannel(), AxisOrderChange(), NumpyStackToTensors(), Crop() #transforms.ToTensor(), #transforms.RandomCrop(size = (self.vertical_crop, self.horizontal_crop), pad_if_needed=True, padding_mode = 'constant') ]) dataset.set_transform(trsfm) else: self.vertical_crop = 128 self.horizontal_crop = 281 dataset = datasets.DatasetFolder(root = self.data_dir, loader = self.default_loader, transform = trsfm, extensions=('.pickle')) super().__init__(self.dataset, batch_size, shuffle, validation_split, weighted_sample, num_workers)
def init_ld(root_dir, tform, batch_size, shuffle, num_workers, load_type): if load_type == "none": if os.path.exists(root_dir): data = datasets.ImageFolder(root=root_dir, transform=tform) else: data = None elif "feature" in load_type or "logit" in load_type: def pickle_loader(input): return pickle.load(open(input, 'rb')) target_dir = root_dir + "_%s" % (load_type) if os.path.exists(target_dir): data = datasets.DatasetFolder(root=target_dir, loader=pickle_loader, extensions=("pk")) else: data = None if data is None: return None else: return tc.utils.data.DataLoader(data, batch_size=batch_size, shuffle=shuffle, drop_last=False, num_workers=num_workers)
def get_dataset(name, data_dir, size=64, lsun_categories=None): transform = transforms.Compose([ transforms.Resize(size), transforms.CenterCrop(size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())), ]) if name == 'image': dataset = datasets.ImageFolder(data_dir, transform) elif name == 'npy': # Only support normalization for now dataset = datasets.DatasetFolder(data_dir, npy_loader, ['npy']) elif name == 'cifar10': dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform) elif name == 'lsun': if lsun_categories is None: lsun_categories = 'train' dataset = datasets.LSUN(data_dir, lsun_categories, transform) elif name == 'lsun_class': dataset = datasets.LSUNClass(data_dir, transform, target_transform=(lambda t: 0)) else: raise NotImplemented return dataset
def get_dataset(name, data_dir, size=64, lsun_categories=None): transform = transforms.Compose([ transforms.Resize(size), transforms.CenterCrop(size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())), ]) if name == 'image': dataset = datasets.ImageFolder(data_dir, transform) elif name == 'npy': # Only support normalization for now dataset = datasets.DatasetFolder(data_dir, npy_loader, ('npy', )) elif name == 'synthetic': def transform(x): return x * 2 - 1 data_path = os.path.join( data_dir, 'dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz') data_tensor = np.load(data_path, encoding='bytes') data_tensor = torch.from_numpy( data_tensor['imgs']).unsqueeze(1).float() dataset = CustomTensorDataset(data_tensor, type='dsprites', transform=transform) else: raise NotImplemented return dataset
def get_dataset(name, data_dir, size=64, lsun_categories=None, deterministic=False, transform=None): transform = transforms.Compose([ t for t in [ transforms.Resize(size), transforms.CenterCrop(size), (not deterministic) and transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), (not deterministic) and transforms.Lambda( lambda x: x + 1. / 128 * torch.rand(x.size())), ] if t is not False ]) if transform == None else transform if name == 'image': print('Using image labels') dataset = datasets.ImageFolder(data_dir, transform) nlabels = len(dataset.classes) elif name == 'webp': print('Using no labels from webp') dataset = CachedImageFolder(data_dir, transform) nlabels = len(dataset.classes) elif name == 'npy': # Only support normalization for now dataset = datasets.DatasetFolder(data_dir, npy_loader, ['npy']) nlabels = len(dataset.classes) elif name == 'cifar10': dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform) nlabels = 10 elif name == 'stacked_mnist': dataset = StackedMNIST(data_dir, transform=transforms.Compose([ transforms.Resize(size), transforms.CenterCrop(size), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])) nlabels = 1000 elif name == 'lsun': if lsun_categories is None: lsun_categories = 'train' dataset = datasets.LSUN(data_dir, lsun_categories, transform) nlabels = len(dataset.classes) elif name == 'lsun_class': dataset = datasets.LSUNClass(data_dir, transform, target_transform=(lambda t: 0)) nlabels = 1 else: raise NotImplemented return dataset, nlabels
def get_train_dataset(dataset_name, traindir, vision_type, colour_space, other_transformations, normalize, target_size, target_transform=None, random_labels=False): colour_transformations = preprocessing.colour_transformation( vision_type, colour_space) chns_transformation = preprocessing.channel_transformation( vision_type, colour_space) transformations = prepare_transformations_train( dataset_name, colour_transformations, other_transformations, chns_transformation, normalize, target_size, random_labels=random_labels) if dataset_name in folder_dbs: if random_labels: train_dataset = custom_datasets.RandomImageNet( traindir, transform=transformations, loader=pil2numpy_loader, target_transform=target_transform) else: train_dataset = datasets.ImageFolder( traindir, transformations, loader=pil2numpy_loader, target_transform=target_transform, is_valid_file=is_image_file) elif dataset_name == 'cifar10': train_dataset = datasets.CIFAR10(traindir, train=True, download=False, transform=transformations) elif dataset_name == 'cifar100': train_dataset = datasets.CIFAR100(traindir, train=True, download=False, transform=transformations) elif 'wcs_lms' in dataset_name: data_loader_train = lambda x: npy_data_loader(x) train_dataset = datasets.DatasetFolder(traindir, data_loader_train, ('.npy', ), transformations) elif 'wcs_jpg' in dataset_name: train_dataset = datasets.ImageFolder(traindir, transformations, loader=pil2numpy_loader) else: sys.exit('Dataset %s is not supported.' % dataset_name) return train_dataset
def prepare_det_data_loaders(trans, folder_path, ext_list, batch_size, shuffle_tag, kwargs, sampler=None): temp = torch.utils.data.DataLoader( datasets.DatasetFolder(folder_path, data_loader, ext_list, transform=trans), batch_size=batch_size, shuffle=shuffle_tag, sampler=sampler, worker_init_fn=torch.manual_seed(0), **kwargs ) return temp
def get_dataset(name, data_dir, size=64, lsun_categories=None, load_in_mem=False): transform = transforms.Compose([ transforms.Resize(size), transforms.CenterCrop(size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())), ]) data_dir = os.path.expanduser(data_dir) if name == 'image': dataset = datasets.ImageFolder(data_dir, transform) nlabels = len(dataset.classes) elif name == 'hdf5': from TOOLS.make_hdf5 import Dataset_HDF5 transform = transforms.Compose([ transforms.Lambda(lambda x: x.transpose(1, 2, 0)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())), ]) dataset = Dataset_HDF5(root=data_dir, transform=transform, load_in_mem=load_in_mem) nlabels = len(dataset.classes) elif name == 'npy': # Only support normalization for now dataset = datasets.DatasetFolder(data_dir, npy_loader, ['npy']) nlabels = len(dataset.classes) elif name == 'cifar10': transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform) nlabels = 10 elif name == 'lsun': if lsun_categories is None: lsun_categories = 'train' dataset = datasets.LSUN(data_dir, lsun_categories, transform) nlabels = len(dataset.classes) elif name == 'lsun_class': dataset = datasets.LSUNClass(data_dir, transform, target_transform=(lambda t: 0)) nlabels = 1 else: raise NotImplemented return dataset, nlabels
def load_data(self): # load benign mnist dataset trans = transforms.Compose([transforms.ToTensor()]) folder_path = os.path.join(definitions.TROJAN_DIR, 'benign_mnist') log_str = 'Data Folder -> %s' % folder_path print(log_str) logger.debug(log_str) ## load test dataset benign_test_loader = torch.utils.data.DataLoader( datasets.DatasetFolder(os.path.join(folder_path, 'test'), self_data_loader, ['bin'], transform=trans), batch_size=self.args.test_batch_size, shuffle=False) self.benign_test_dict = data_loader2dict(benign_test_loader) benign_test_loader = torch.utils.data.DataLoader( datasets.DatasetFolder(os.path.join(folder_path, 'test'), self_data_loader, ['bin'], transform=trans), batch_size=self.args.test_batch_size, shuffle=False) self.trojan_test_dict = data_loader2dict(benign_test_loader) ## load train dataset benign_train_loader = torch.utils.data.DataLoader( datasets.DatasetFolder(os.path.join(folder_path, 'train'), self_data_loader, ['bin'], transform=trans), batch_size=self.args.batch_size, shuffle=True, worker_init_fn=torch.manual_seed(0)) self.benign_train_dict = data_loader2dict(benign_train_loader) benign_train_loader = torch.utils.data.DataLoader( datasets.DatasetFolder(os.path.join(folder_path, 'train'), self_data_loader, ['bin'], transform=trans), batch_size=self.args.batch_size, shuffle=True, worker_init_fn=torch.manual_seed(0)) self.trojan_train_dict = data_loader2dict(benign_train_loader) print('batch_size: ', self.args.batch_size) print('#batch: ', len(self.trojan_train_dict))
def generate_dataset_cv(data_path, loader, extensions, train_transform, test_transform): # train_path = os.path.join(data_path, 'ISBI2020_prep_Mix_sigma10') # train_path = os.path.join(data_path, 'ISBI2020_prep_Train_sigma10') train_path = os.path.join(data_path, 'Kaggle2019_prep_train_sigma10') train_dataset = datasets.DatasetFolder(train_path, loader, extensions, transform=train_transform) return train_dataset
def make_dataset(path): transform = Compose([ Resize((64, 64)), ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.55, 0.55, 0.55]) ]) dataset = dset.DatasetFolder(path, default_loader, extensions=('jpg', ), transform=transform) return dataset
def cal_train_data(self): dataset = datasets.DatasetFolder( root=self.data_path, loader=lambda x: torch.from_numpy(np.load(x)), extensions='.npy', transform=MySimCLRTrainDataTransform(self.img_size)) #dataset = datasets.ImageFolder(root=self.data_path, # transform=MySimCLRTrainDataTransform(self.img_size)) num_train = len(dataset) split = int(np.floor(self.test_ratio * num_train)) return num_train - split
def generate_dataset(data_path, loader, extensions, train_transform, test_transform): train_path = os.path.join(data_path, 'ISBI2020_prep_Mix_sigma10') # test_path = os.path.join(data_path, 'ISBI2020_prep_Test_sigma10') # val_path = os.path.join(data_path, 'val') train_dataset = datasets.DatasetFolder(train_path, loader, extensions, transform=train_transform) # test_dataset = datasets.DatasetFolder(test_path, loader, extensions, transform=test_transform) # val_dataset = datasets.DatasetFolder(val_path, loader, extensions, transform=test_transform) return train_dataset
def jsonloader(data_dir, img_size, batchSize): # Preprocessing: Resize, brightness corrections dataset = dset.DatasetFolder(root=data_dir, loader=None, transform=transforms.Compose([ transforms.Resize(img_size), transforms.ToTensor(), ])) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batchSize, shuffle=True) print('Data size:', len(dataset), 'json') return dataloader
def get_dataset(name, data_dir, size=64, lsun_categories=None, config=None): transform = transforms.Compose([ transforms.Resize(size), transforms.CenterCrop(size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())), ]) if name == "MoG": dataset = MixtureOfGaussianDataset(config) nlabels = 1 elif name.lower() == "celeba": imgs = np.load("/home/LargeData/celebA_64x64.npy") labels = np.zeros([imgs.shape[0]]).astype(np.int64) dataset = NumpyImageDataset(imgs, labels, transform) nlabels = 1 elif name == 'image': dataset = datasets.ImageFolder(data_dir, transform) nlabels = len(dataset.classes) elif name == 'npy': # Only support normalization for now dataset = datasets.DatasetFolder(data_dir, npy_loader, 'npy') nlabels = len(dataset.classes) elif name == 'cifar10': dataset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform) nlabels = 10 elif name == 'lsun': if lsun_categories is None: lsun_categories = 'train' dataset = datasets.LSUN(data_dir, lsun_categories, transform) nlabels = len(dataset.classes) elif name == 'lsun_class': dataset = datasets.LSUNClass(data_dir, transform, target_transform=(lambda t: 0)) nlabels = 1 else: raise NotImplementedError return dataset, nlabels
def val_dataloader(self): dataset = datasets.DatasetFolder( root=self.data_path, loader=lambda x: torch.from_numpy(np.load(x)), extensions='.npy', transform=MySimCLREvalDataTransform(self.img_size)) num_train = len(dataset) indices = list(range(num_train)) split = int(np.floor(self.test_ratio * num_train)) val_idx = indices[:split] val_sampler = SubsetRandomSampler(val_idx) self.num_val_imgs = len(val_idx) self.sample_dataloader = DataLoader(dataset, num_workers=50, batch_size=144, sampler=val_sampler, drop_last=True) return self.sample_dataloader
def load_observations( game: GymGame, random_split: bool, observations_dir, batch_size=32, drop_z_values=True, validation_percentage=0.1, ): """ Load observations from disk and return a dataset and dataloader. Observations are loaded from *observations_dir*. drop_z_values drops the z and next_z parameters from the dataset. random_split controls wether the dataset is split randomly into training/validation subsets or not. """ def load_and_transform(filename): obs_dict = Observation.load_as_dict(filename) obs_dict["screen"] = transform(obs_dict["screen"]) if drop_z_values: del obs_dict["z"] del obs_dict["next_z"] return obs_dict observations_dir /= game.key dataset = datasets.DatasetFolder( root=str(observations_dir), loader=load_and_transform, extensions=Observation.FILE_EXTENSION, ) dataset_size = len(dataset) validation_size = int(dataset_size * validation_percentage) training_size = dataset_size - validation_size if random_split: validation_ds, training_ds = torch.utils.data.dataset.random_split( dataset, [validation_size, training_size]) else: validation_ds = Subset(dataset, range(0, validation_size)) training_ds = Subset(dataset, range(validation_size, dataset_size)) validation_dl = DataLoader(validation_ds, batch_size=batch_size) training_dl = DataLoader(training_ds, batch_size=batch_size) return training_dl, validation_dl
def predict_samples(self, samples_dir): if not os.path.exists(self.saved_model): print('No saved model in {}'.format(self.saved_model)) exit(1) self.model.load_state_dict(torch.load(self.saved_model, map_location={'cuda:0': 'cpu'})) self.model.eval() trans = transforms.Compose([transforms.ToTensor()]) test_loader = torch.utils.data.DataLoader( datasets.DatasetFolder(samples_dir, data_loader, ['bin'], transform=trans), batch_size=1) for data, _ in test_loader: data = Variable(data) torch.no_grad() output = self.model(data) pred = output.data.max(1, keepdim=True)[1] logger.debug('Input: {} - Pred : {}'.format(data, pred))
def predict_tandem(self, samples_dir, nn2): self.model.load_state_dict( torch.load(self.saved_model, map_location={'cuda:0': 'cpu'})) self.model.eval() trans = transforms.Compose([transforms.ToTensor()]) test_loader = torch.utils.data.DataLoader(datasets.DatasetFolder( samples_dir, data_loader, ['bin'], transform=trans), batch_size=1) nn2.model.load_state_dict( torch.load(nn2.saved_model, map_location={'cuda:0': 'cpu'})) nn2.model.eval() torch.no_grad() for data, _ in test_loader: data = Variable(data) output = self.model(data) pred1 = output.data.max(1, keepdim=True)[1] output = nn2.model(data) pred2 = output.data.max(1, keepdim=True)[1] logger.debug('Input: {} - Pred 1: {} - pred 2: {}'.format( data, pred1, pred2))