def __init__(self, path="", use_train=False, use_test=False, **kwargs): super().__init__() self.dataset_dir = osp.join(path, 'cifar10') self._check_before_run() if use_train: self.train['handle'] = CIFAR10(root=self.dataset_dir, train=True, transform=None, download=True) self.train['n_samples'] = len(self.train['handle']) logger.info("=> CIFAR10 TRAIN loaded") logger.info("Dataset statistics:") logger.info(" ------------------------------") logger.info(" subset | # class | # images") logger.info(" ------------------------------") logger.info(" train | {:7d} | {:8d}".format( 10, self.train['n_samples'])) logger.info(" ------------------------------") if use_test: self.val['handle'] = CIFAR10(root=self.dataset_dir, train=False, transform=None, download=True) self.val['n_samples'] = len(self.val['handle']) logger.info("=> CIFAR10 VAL loaded") logger.info("Dataset statistics:") logger.info(" ------------------------------") logger.info(" subset | # class | # images") logger.info(" ------------------------------") logger.info(" val | {:7d} | {:8d}".format( 10, self.val['n_samples'])) logger.info(" ------------------------------")
def cifarloader(root): transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) trainset = CIFAR10(root=root, train=True, download=True, transform=transform_train) trainloader = Loader(trainset, batch_size=64, shuffle=True, num_workers=0) testset = CIFAR10(root=root, train=False, download=True, transform=transform_test) testloader = Loader(testset, batch_size=100, shuffle=False, num_workers=0) return trainloader, testloader
def data_load(path, batch_size): """load dataset""" normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) #be careful transform_train = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) train_loader = DataLoader(CIFAR10(path, train=True, download=True, transform=transform_train), batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True) print('train set: {}'.format(len(train_loader.dataset))) test_loader = DataLoader(CIFAR10(path, train=False, download=True, transform=transform_test), batch_size=batch_size * 8, shuffle=False, num_workers=0, pin_memory=True) print('val set: {}'.format(len(test_loader.dataset))) # data_train_iter=iter(train_loader) # data_test_iter=iter(test_loader) # image_train,label_train=data_train_iter.next() # image_test, label_test = data_test_iter.next() return train_loader, test_loader pass
def get_basic_train_test_loaders(path, batch_size, num_workers, device): train_set_raw = CIFAR10(root=path, train=True, download=False) test_set_raw = CIFAR10(root=path, train=False, download=False) train_transforms = Compose([ RandomHorizontalFlip(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) test_transforms = Compose([ ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) train_set = TransformedDataset(train_set_raw, train_transforms) test_set = TransformedDataset(test_set_raw, test_transforms) train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers, pin_memory="cuda" in device, drop_last=True) test_loader = DataLoader(test_set, batch_size=batch_size * 4, num_workers=num_workers, pin_memory="cuda" in device, drop_last=False) return train_loader, test_loader
def __init__(self, root): self.trainset = self.DataSet(CIFAR10(root=root, train=True, download=True), 50000, images_per_cls=10, use_all=False, mode='train' ) self.testset = self.DataSet(CIFAR10(root=root, train=False, download=True), 10000, images_per_cls=10, use_all=False, mode='test' )
def load_cifar10(path=os.path.join(BASE_PATH, 'cifar10')): """ Retourne l'ensemble d'entraînement du jeu de données CIFAR10. Le jeu de données est téléchargé s'il n'est pas présent. Args: path (str): Le répertoire où trouver ou télécharger CIFAR10. Returns: Tuple (jeu de données d'entraînement, jeu de données de test). """ train_dataset = CIFAR10(path, train=True, download=True) test_dataset = CIFAR10(path, train=False, download=True) return train_dataset, test_dataset
def main(): parser = argparse.ArgumentParser(description='Deep-Infomax pytorch') parser.add_argument('--batch_size', default=5, type=int, help='batch_size') parser.add_argument('--epochs', default=10, type=int, help='epochs') parser.add_argument('--lr', default=1e-3, type=float, help='learning rate') # parser.add_argument('--ish', default=True, type=bool, help='is h?') # whether I(x, y): False or I(h, y): True args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' # image size 3, 32, 32 # batch size must be an even number # shuffle must be True cifar_10_train_dt = CIFAR10('data', train=True, download=True, transform=ToTensor()) cifar_10_train_l = DataLoader(cifar_10_train_dt, batch_size=args.batch_size, shuffle=True, drop_last=True, pin_memory=torch.cuda.is_available()) wrn = wide_resnet_34_10().to(device) loss_fn = DeepInfoMaxLoss().to(device) optim = Adam(wrn.parameters(), lr=args.lr) loss_optim = Adam(loss_fn.parameters(), lr=args.lr) train_dim(args, cifar_10_train_dt, cifar_10_train_l, device, loss_fn, loss_optim, optim, wrn)
def get_supervised_trainset(root, num_train_samples_per_class=25, download=True): if num_train_samples_per_class == 25: return get_supervised_trainset_0_250(root, download=download) num_classes = 10 full_train_dataset = CIFAR10(root, train=True, download=download) if num_train_samples_per_class is None: return full_train_dataset supervised_train_indices = [] counter = [0] * num_classes np.random.seed(num_train_samples_per_class) indices = list(range(len(full_train_dataset))) random_indices = np.random.permutation(indices) for i in random_indices: dp = full_train_dataset[i] if len(supervised_train_indices) >= num_classes * num_train_samples_per_class: break if counter[dp[1]] < num_train_samples_per_class: counter[dp[1]] += 1 supervised_train_indices.append(i) return Subset(full_train_dataset, supervised_train_indices)
def __init__( self, split: Optional[str], batchsize_per_replica: int, shuffle: bool, transform: Optional[Union[ClassyTransform, Callable]], num_samples: Optional[int], root: str, download: bool = None, ): assert self._CIFAR_TYPE in [ "cifar10", "cifar100", ], "CIFARDataset must be subclassed and a valid _CIFAR_TYPE provided" if self._CIFAR_TYPE == "cifar10": dataset = CIFAR10(root=root, train=(split == "train"), download=download) if self._CIFAR_TYPE == "cifar100": dataset = CIFAR100(root=root, train=(split == "train"), download=download) super().__init__(dataset, batchsize_per_replica, shuffle, transform, num_samples)
def download10(): """download cifar100 dataset""" if not os.path.exists(IMG_DIR): os.mkdir(IMG_DIR) cifar = CIFAR10(root=IMG_DIR, download=True) assert (bool(cifar)) print("download complete")
def main(use_rev_backward=True): # use_rev_backward = False # 定义数据集 use_train_data_for_test = False dataset_path = r'D:\DeepLearningProject\datasets\cifar10' train_dataset = CIFAR10(dataset_path, True) x_train, y_train = train_dataset.data, train_dataset.targets val_dataset = CIFAR10(dataset_path, False) x_val, y_val = val_dataset.data, val_dataset.targets y_train = np.asarray(y_train) y_val = np.asarray(y_val) x_train = x_train[:] y_train = y_train[:] del train_dataset, val_dataset if use_train_data_for_test: x_val, y_val = x_train, y_train # Training settings use_cuda = torch.cuda.is_available() # torch.manual_seed(int(time.time())) device = torch.device("cuda" if use_cuda else "cpu") model = NewNet(use_rev_backward).to(device) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0) try: model.load_state_dict(torch.load("cifar10_cnn.pt")) except (FileNotFoundError, RuntimeError): print('Not found save model') batch_size = 200 for epoch in range(1000): train(model, device, optimizer, x_train, y_train, epoch, batch_size) test(model, device, x_val, y_val, batch_size) torch.save(model.state_dict(), "cifar10_cnn.pt")
def main(): os.makedirs(PATH, exist_ok=True) trn_ds = CIFAR10(PATH, train=True, download=True) tst_ds = CIFAR10(PATH, train=False, download=True) trn = trn_ds.train_data.astype('float32')/255, np.array(trn_ds.train_labels) tst = tst_ds.test_data.astype('float32')/255, np.array(tst_ds.test_labels) sz, bs = 32, 128 stats = (np.array([ 0.4914 , 0.48216, 0.44653]), np.array([ 0.24703, 0.24349, 0.26159])) aug_tfms = [RandomFlip(), Cutout(1, 16)] tfms = tfms_from_stats(stats, sz, aug_tfms=aug_tfms, pad=4) data = ImageClassifierData.from_arrays(PATH, trn, tst, bs=bs, tfms=tfms) wrn = WideResNet(n_grps=3, N=4, k=10) learn = ConvLearner.from_model_data(wrn, data) train(learn)
def get_iterator(mode): dataset = CIFAR10(root=DATA_DIR, download=True, train=mode) data = getattr(dataset, 'train_data' if mode else 'test_data') labels = getattr(dataset, 'train_labels' if mode else 'test_labels') tensor_dataset = tnt.dataset.TensorDataset([data, labels]) return tensor_dataset.parallel(batch_size=BATCH_SIZE, num_workers=4, shuffle=mode)
def cifar10(dataroot, workers, batch_size): dataset = CIFAR10(root=dataroot, download=True, transform=ImageTransform()) # transform=transforms.ToTensor()) loader = data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=workers, drop_last=True) return loader
def get_cifar10_loader(root, train, download, transform, batch_size=4): shuffle = train dataset = CIFAR10(root=root, train=train, download=download, transform=transform) return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=2)
def test_three_channel_grey(): to_grey = Grayscale() data = CIFAR10('/home/duane/PycharmProjects/iic/data') orig, label = data[100] grey = to_grey(orig) color = three_channel_grey(grey) show(orig, grey, color)
def test_gradient_color(): to_grey = Grayscale() data = CIFAR10('/home/duane/PycharmProjects/iic/data', download=True) orig, label = data[5880] grad_color = gradient(orig) grey = to_grey(grad_color) grad = gradient_grey(grey) for i in range(5): grad = gradient_grey(grad) show(orig, grad_color, grad)
def test_connectedness_color(): to_grey = Grayscale() data = CIFAR10('/home/duane/PycharmProjects/iic/data') orig, label = data[100] grad = gradient(orig) grey = to_grey(grad) image_np = np.asarray(grey) connected = connectedness(grey, 20.0, 0.5) image_np_con = np.asarray(connected) show(orig, grey, connected)
def init_cifar_dataloader(root, batchSize): """load dataset""" normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) transform_train = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) transform_test = transforms.Compose([ transforms.ToTensor(), normalize ]) train_loader = DataLoader(CIFAR10(root, train=True, download=True, transform=transform_train), batch_size=batchSize, shuffle=True, num_workers=4, pin_memory=True) print(f'train set: {len(train_loader.dataset)}') test_loader = DataLoader(CIFAR10(root, train=False, download=True, transform=transform_test), batch_size=batchSize * 8, shuffle=False, num_workers=4, pin_memory=True) print(f'val set: {len(test_loader.dataset)}') return train_loader, test_loader
def test_grad_blur_color(): data = CIFAR10('/home/duane/PycharmProjects/iic/data', download=True) orig, label = data[5810] orig = to_grey(orig) blurred = blur(gradient_grey(orig)) final = blur(gradient_grey(blurred)) for i in range(1): final = blur(gradient_grey(final)) final = three_channel_grey(final) final = F.adjust_brightness(final, 2.0) final = to_grey(final) show(orig, blurred, final)
def get_dataset(mode): is_train = True if mode=="train" or mode=="val" else False if args.dataset == "mnist": transform=transforms.Compose([ transforms.ToTensor()]) dataset = MNIST(root='./data', download=True, train=is_train,transform=transform) elif args.dataset == "cifar": transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) dataset = CIFAR10(root="/data/lisa/data/cifar10",download=True,train=is_train, transform=transform) return dataset
def main(): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) basic_transforms = [ transforms.ToTensor(), normalize ] fast_cifar10 = FastCIFAR10('../data', train=True, transform=transforms.Compose(basic_transforms), final_shape=(50000,3,32,32)) fast_cifar10.reset() cifar10 = CIFAR10('../data', train=True, transform=transforms.Compose(basic_transforms)) cifar10_1 = CIFAR10('../data', train=True, transform=transforms.Compose(basic_transforms)) f1,_ = fast_cifar10[0] c1,_ = cifar10[0] c2,_ = cifar10_1[0] print('f1,c1 Equals', np.array_equal(f1,c1)) print('c1,c2 Equals', np.array_equal(c1,c2))
def get_fast_train_test_loaders(path, batch_size, num_workers, device, train_transforms=default_train_transforms): train_set_raw = CIFAR10(root=path, train=True, download=False) test_set_raw = CIFAR10(root=path, train=False, download=False) print('Preprocessing training data') train_data = train_set_raw.train_data if hasattr(train_set_raw, "train_data") else train_set_raw.data train_labels = train_set_raw.train_labels if hasattr(train_set_raw, "train_labels") else train_set_raw.targets train_set = list(zip(transpose(normalise(pad(train_data, 4))), train_labels)) print('Preprocessing test data') test_data = test_set_raw.test_data if hasattr(test_set_raw, "test_data") else test_set_raw.data test_labels = test_set_raw.test_labels if hasattr(test_set_raw, "test_labels") else test_set_raw.targets test_set = list(zip(transpose(normalise(test_data)), test_labels)) train_set = Transform(train_set, train_transforms) train_loader = DataLoader(train_set, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory="cuda" in device, drop_last=True) test_loader = DataLoader(test_set, batch_size=batch_size, num_workers=0, shuffle=False, pin_memory="cuda" in device, drop_last=False) return train_loader, test_loader
def load_cifar10(path, download=False): img_transform_train = transforms.Compose([ transforms.RandomRotation(degrees=25), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) img_transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dataset = CIFAR10(path, train=True, download=download, transform=img_transform_train) test_dataset = CIFAR10(path, train=False, download=download, transform=img_transform_test) return train_dataset, test_dataset
def get_test_loader( root, transforms=test_transforms, download=True, **dataloader_kwargs ): full_test_dataset = CIFAR10(root, train=False, download=download) dataloader_kwargs["pin_memory"] = "cuda" in idist.device().type dataloader_kwargs["drop_last"] = False dataloader_kwargs["shuffle"] = False test_loader = idist.auto_dataloader( TransformedDataset( full_test_dataset, transforms=lambda dp: {"image": transforms(dp[0]), "target": dp[1]}, ), **dataloader_kwargs ) return test_loader
def build_data_loader(cfg, is_train, is_distributed=False): # prepare dataset and dataloader DATASET = cfg.INPUT.DATASET data_path = cfg.INPUT.DATASET_ROOT mean, std = cfg.INPUT.MEAN, cfg.INPUT.STD if is_train: data_transforms = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize(224), # transforms.ColorJitter(), transforms.ToTensor(), # torchvision的ToTensor 除以了255,归一化到[0, 1] transforms.Normalize(mean=mean, std=std), ]) batch_size = cfg.TRAIN.BATCH_SIZE else: data_transforms = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize(224), # transforms.ColorJitter(), transforms.ToTensor(), # torchvision的ToTensor 除以了255,归一化到[0, 1] transforms.Normalize(mean=mean, std=std), ]) batch_size = cfg.TEST.BATCH_SIZE if DATASET == 'cifar10': dataset = CIFAR10(root=data_path, train=is_train, transform=data_transforms) elif DATASET == 'cifar100': dataset = CIFAR100(root=data_path, train=is_train, transform=data_transforms) else: print('Not support dataset: ', DATASET) return {} sampler = make_data_sampler(dataset, shuffle=is_train, is_distributed=is_distributed) loader = DataLoader(dataset, batch_size, sampler=sampler, num_workers=cfg.DATALOADER.NUM_WORKERS) return loader
def __init__(self, root, train=True, transform=None, target_transform=None, download=False, isgpu = True): dset = CIFAR10(root, train, transform, target_transform, download) if train: self.data = dset.train_data self.labels = dset.train_labels else: self.data = dset.test_data self.labels = dset.test_labels # self.data = self.data.float()/255.0 - 0.5 # self.data = self.data.unsqueeze(1) self.transform = transform self.target_transform = target_transform
def get_iterator(mode): # # SVHN # if mode is True: # dataset = SVHN(root='./data', download=True, split="train") # else: # dataset = SVHN(root='./data', download=True, split="test") # data = dataset.data # data = np.array(data, dtype=np.float32) # labels = dataset.labels # tensor_dataset = tnt.dataset.TensorDataset([data, labels]) # cifar10 dataset = CIFAR10(root='./data', download=True, train=mode) data = dataset.data data = np.transpose(data, [0, 3, 1, 2]) data = np.array(data, dtype=np.float32) labels = dataset.targets tensor_dataset = tnt.dataset.TensorDataset([data, labels]) return tensor_dataset.parallel(batch_size=BATCH_SIZE, num_workers=4, shuffle=mode)
from pathlib import Path from torchvision.datasets.cifar import CIFAR10 from torch.utils.data import DataLoader, Subset from torchvision.transforms import ToTensor, ToPILImage from tqdm import tqdm import random # from matplotlib import pyplot as plt import numpy as np device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = 128 # image size 3, 32, 32 # batch size must be an even number # shuffle must be True cifar_10_train_dt = CIFAR10(r'c:\data\tv', download=True, transform=ToTensor()) #dev = Subset(cifar_10_train_dt, range(128)) cifar_10_train_l = DataLoader(cifar_10_train_dt, batch_size=batch_size, shuffle=True, drop_last=True, pin_memory=torch.cuda.is_available()) epoch = 9 model_path = Path(r'c:\data\deepinfomax\models\run1\encoder' + str(epoch)) encoder = models.Encoder() encoder.load_state_dict(torch.load(str(model_path))) encoder.to(device) # compute the latent space for each image and store in (latent, image)
if __name__ == '__main__': torch.manual_seed(1); parser = argparse.ArgumentParser(description='DeepInfomax pytorch') parser.add_argument('--batch_size', default=64, type=int, help='batch_size') args = parser.parse_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') batch_size = args.batch_size # image size 3, 32, 32 # batch size must be an even number # shuffle must be True cifar_10_train_dt = CIFAR10('data', download=True, train=True, transform=ToTensor()) cifar_10_train_l = DataLoader(cifar_10_train_dt, batch_size=batch_size, shuffle=True, drop_last=True, pin_memory=torch.cuda.is_available()) encoder = Encoder().to(device) # mMking it local only loss_fn = DeepInfoMaxLoss(0, 1, 0.1).to(device) encoder_optim = Adam(encoder.parameters(), lr=1e-4) loss_optim = Adam(loss_fn.parameters(), lr=1e-4) epoch_restart = 20 root = Path(r'models') if epoch_restart > 0 and root is not None: enc_file = root / Path('encoder' + str(epoch_restart) + '.wgt') loss_file = root / Path('loss' + str(epoch_restart) + '.wgt')