def make_dataset(): # Small noise is added, following SN-GAN def noise(x): return x + torch.FloatTensor(x.size()).uniform_(0, 1.0 / 128) if opt.dataset == "cifar10": trans = tfs.Compose([ tfs.RandomCrop(opt.img_width, padding=4), tfs.RandomHorizontalFlip(), tfs.ToTensor(), tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]), tfs.Lambda(noise) ]) data = CIFAR10(root=opt.root, train=True, download=False, transform=trans) loader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers) elif opt.dataset == "dog_and_cat_64": trans = tfs.Compose([ tfs.RandomResizedCrop(opt.img_width, scale=(0.8, 0.9), ratio=(1.0, 1.0)), tfs.RandomHorizontalFlip(), tfs.ToTensor(), tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]), tfs.Lambda(noise) ]) data = ImageFolder(opt.root, transform=trans) loader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers) elif opt.dataset == "dog_and_cat_128": trans = tfs.Compose([ tfs.RandomResizedCrop(opt.img_width, scale=(0.8, 0.9), ratio=(1.0, 1.0)), tfs.RandomHorizontalFlip(), tfs.ToTensor(), tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]), tfs.Lambda(noise) ]) data = ImageFolder(opt.root, transform=trans) loader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers) elif opt.dataset == "imagenet": trans = tfs.Compose([ tfs.RandomResizedCrop(opt.img_width, scale=(0.8, 0.9), ratio=(1.0, 1.0)), tfs.RandomHorizontalFlip(), tfs.ToTensor(), tfs.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]), tfs.Lambda(noise) ]) data = ImageFolder(opt.root, transform=trans) loader = DataLoader(data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers) else: raise ValueError(f"Unknown dataset: {opt.dataset}") return loader
]) x = im_aug(x) return x def test_tf(x): im_aug = tfs.Compose([ tfs.Resize(96), tfs.ToTensor(), tfs.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) x = im_aug(x) return x train_set = CIFAR10('./adata', train=True, transform=train_tf, download=True) test_set = CIFAR10('./adata', train=False, transform=test_tf, download=True) test_data = DataLoader(test_set, batch_size=128, shuffle=True) train_data = DataLoader(train_set, batch_size=64, shuffle=True) #显示训练集中的第101张图片 (data, label) = train_set[101] a = show((data + 1) / 2).resize((100, 100)) #show可以把tensor转成Image方便可视化 plt.imshow(a) plt.show() #一口气显示一个batch-size的数据 dataiter = iter(train_data) images, labels = dataiter.next() b = show(tv.utils.make_grid((images + 1)) / 2).resize((400, 100)) plt.imshow(b) plt.show()
# bottleneck path x = F.relu(self.bn1(self.conv1(inputs)), inplace = True) x = F.relu(self.bn2(self.conv2(x)), inplace = True) x = self.bn3(self.conv3(x)) # shortcut path sc = self.bn0(self.conv0(inputs)) if self.shortcut else inputs if x.shape != sc.shape: print(x.shape, sc.shape) # merge two paths assert x.shape == sc.shape, "merge failed in resBlock" return F.relu(x + sc, inplace = True) if __name__ == "__main__": from torchvision.datasets import CIFAR10 import torchvision.transforms as transforms transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = CIFAR10("~/dataset/cifar10", transform = transform) x = trainset[0][0].unsqueeze(0) print(x.shape) b = resBlock(3, 1) y = b(x) print(b) print(y.shape, y.max(), y.min())
test_y = test_net(test_x) print('output shape:{}*{}*{}'.format(test_y.shape[1], test_y.shape[2], test_y.shape[3])) #验证一下过渡层是否正确 test_net = transition(3,12) test_x = Variable(torch.zeros(1,3,96,96)) print('input shape:{}*{}*{}'.format(test_x.shape[1], test_x.shape[2], test_x.shape[3])) test_y = test_net(test_x) print('output shape:{}*{}*{}'.format(test_y.shape[1], test_y.shape[2], test_y.shape[3])) test_net = densenet(3,10) test_x = Variable(torch.zeros(1,3,96,96)) test_y = test_net(test_x) print('output:{}'.format(test_y.shape)) train_set = CIFAR10('../data/',train=True,transform=data_tf) train_data = torch.utils.data.DataLoader(train_set,batch_size=16, shuffle=True) test_set = CIFAR10('../data/',train=False,transform=data_tf) test_data = torch.utils.data.DataLoader(test_set,batch_size=16, shuffle=False) net = densenet(3,10) optimizer = torch.optim.SGD(net.parameters(),lr=0.01) criterion = nn.CrossEntropyLoss() from vgg_net import utils utils.train(net, train_data, test_data, 20,optimizer, criterion)
T.RandomCrop(size=32, padding=4), T.ToTensor(), T.Normalize( [0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010] ) # T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)) # CIFAR-100 ]) test_transform = T.Compose([ T.ToTensor(), T.Normalize( [0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010] ) # T.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)) # CIFAR-100 ]) cifar10_train = CIFAR10('../cifar10', train=True, download=True, transform=train_transform) cifar10_unlabeled = CIFAR10('../cifar10', train=True, download=True, transform=test_transform) cifar10_test = CIFAR10('../cifar10', train=False, download=True, transform=test_transform) ## # Loss Prediction Loss def LossPredLoss(input, target, margin=1.0, reduction='mean'): assert len(input) % 2 == 0, 'the batch size is not even.'
params = argparse.ArgumentParser() params.add_argument('--epoch', default=20, type=int) params.add_argument('--bs', default=32, type=int) params.add_argument('--num_workers', default=8, type=int) params.add_argument('--lr', default=0.001, type=int) opt = params.parse_args() DOWNLOAD_DATASET = True train_transform = torchvision.transforms.Compose([ transforms.Scale(256), transforms.RandomHorizontalFlip(), transforms.RandomCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = CIFAR10(root='../cifar10', train=True, transform=train_transform, download=DOWNLOAD_DATASET) test_dataset = CIFAR10(root='../cifar10', train=False, transform=transforms.ToTensor, download=DOWNLOAD_DATASET) train_loader = Data.DataLoader(dataset=train_dataset, batch_size=opt.bs, shuffle=True, num_workers=opt.num_workers) test_loader = Data.DataLoader(dataset=test_dataset, batch_size=opt.bs, shuffle=True, num_workers=opt.num_workers) n_classes = 10
import datetime BATCH_SIZE = 16 EPOCHS = 25 IMAGE_DIR = './GAN/checkpoints/CIFAR10/Image/Training' IMAGE_SIZE = 32 ITER_DISPLAY = 100 ITER_REPORT = 10 LATENT_DIM = 100 MODEL_DIR = './GAN/checkpoints/CIFAR10/Model' OUT_CHANNEL = 3 os.makedirs(IMAGE_DIR, exist_ok=True) os.makedirs(MODEL_DIR, exist_ok=True) transform = Compose([ToTensor(), Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]) dataset = CIFAR10(root='./datasets', train=True, transform=transform, download=True) data_loader = DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=1) D = Discriminator().apply(init_weights) G = Generator().apply(init_weights) print(D) print(G) optim_D = torch.optim.Adam(D.parameters(), lr=2e-4, betas=(0.5, 0.9)) optim_G = torch.optim.Adam(G.parameters(), lr=2e-4, betas=(0.5, 0.9)) st = datetime.datetime.now() iter = 0 for epoch in range(EPOCHS): for i, data in enumerate(data_loader): iter += 1
def main(): torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False manual_seed(args.seed) train_transform = Compose([ RandomCrop(32, padding=4), RandomHorizontalFlip(), ToTensor(), Normalize([0.491, 0.482, 0.447], [0.247, 0.243, 0.262]), ]) ds = CIFAR10(root=args.data, train=True, download=True) ds_train, ds_search = train_test_split( ds, test_ratio=0.5, shuffle=True, random_state=args.seed, transform=train_transform, test_transform=train_transform) train_queue = DataLoader( ds_train, batch_size=args.batch_size, pin_memory=True, shuffle=True, num_workers=2) valid_queue = DataLoader( ds_search, batch_size=args.batch_size, pin_memory=True, shuffle=True, num_workers=2) set_defaults({ 'relu': { 'inplace': False, }, 'bn': { 'affine': False, } }) model = Network(args.init_channels, args.layers, num_classes=CIFAR_CLASSES) criterion = nn.CrossEntropyLoss() optimizer_arch = Adam( model.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) optimizer_model = SGD( model.model_parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = CosineLR( optimizer_model, float(args.epochs), min_lr=args.learning_rate_min) train_metrics = { "loss": TrainLoss(), "acc": Accuracy(), } eval_metrics = { "loss": Loss(criterion), "acc": Accuracy(), } learner = DARTSLearner(model, criterion, optimizer_arch, optimizer_model, scheduler, train_metrics=train_metrics, eval_metrics=eval_metrics, search_loader=valid_queue, grad_clip_norm=5.0, work_dir='models') for epoch in range(args.epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) print(F.softmax(model.betas_normal[2:5], dim=-1)) # training train_acc, train_obj = train(learner, train_queue, epoch) logging.info('train_acc %f', train_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def prepare_data(self): #download CIFAR10(root=self.params.PATH_DATASET, train=True, download=True) CIFAR10(root=self.params.PATH_DATASET, train=False, download=True)
dataset = LSUN('exp/datasets/lsun', ['church_outdoor_train'], transform=transforms) elif args.dataset == 'tower' or args.dataset == 'bedroom': transforms = Compose([Resize(128), CenterCrop(128), ToTensor()]) dataset = LSUN('exp/datasets/lsun', ['{}_train'.format(args.dataset)], transform=transforms) elif args.dataset == 'celeba': transforms = Compose([ CenterCrop(140), Resize(64), ToTensor(), ]) dataset = CelebA('exp/datasets/celeba', split='train', transform=transforms) elif args.dataset == 'cifar10': dataset = CIFAR10('exp/datasets/cifar10', train=True, transform=ToTensor()) elif args.dataset == 'ffhq': dataset = FFHQ(path='exp/datasets/FFHQ', transform=ToTensor(), resolution=256) dataloader = DataLoader(dataset, batch_size=128, drop_last=False) get_nearest_neighbors(dataloader, args.path, args.i, args.n_samples, args.k, torch.cuda.is_available())
def main(): cifar_train = CIFAR10('.', train=True, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]), download=True) cifar_test = CIFAR10('.', train=False, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]), download=True) dl_train = DataLoader(cifar_train, batch_size=16) dl_test = DataLoader(cifar_test, batch_size=16) logdir = "./logdir/Adam" num_epochs = 10 loaders = {'train': dl_train, 'valid': dl_test} model = resnet34() for name, param in model.named_parameters(): param.requires_grad = True model.train() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) runner = dl.SupervisedRunner() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=num_epochs, verbose=True, logdir=logdir, callbacks=[ logger.TensorboardLogger(), AccuracyCallback(num_classes=10) ], ) logdir = "./logdir/AdamW" model.apply(init_weights) optimizer = AdamW() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=num_epochs, verbose=True, logdir=logdir, callbacks=[ logger.TensorboardLogger(), AccuracyCallback(num_classes=10) ], ) logdir = "./logdir/RAdam" model.apply(init_weights) optimizer = RAdam() runner.train( model=model, criterion=criterion, optimizer=optimizer, loaders=loaders, num_epochs=num_epochs, verbose=True, logdir=logdir, callbacks=[ logger.TensorboardLogger(), AccuracyCallback(num_classes=10) ], )
import torch import torchvision import numpy as np import matplotlib.pyplot as plt import torch.nn as nn import torch.nn.functional as F from torchvision.datasets import CIFAR10 from torchvision.transforms import ToTensor from torchvision.utils import make_grid from torch.utils.data.dataloader import DataLoader from torch.utils.data import random_split import os dataset = CIFAR10(root='data/', transform=ToTensor()) test_dataset = CIFAR10(root='data/', train=False, transform=ToTensor()) dataset_size = 50000 test_dataset_size = 10000 classes = dataset.classes num_classes = 10 #img, label = dataset [0] #plt.imshow(img.permute(1,2,0)) #print('Label (numeric):', label) #print('Label (textual):', classes[label]) #plt.show() torch.manual_seed(43) val_size = 5000 train_size = len(dataset) - val_size train_ds, val_ds = random_split(dataset, [train_size, val_size]) batch_size = 128
# LOADING DATA dataname = args.dataset if dataname == "mnist": train_loader = T.utils.data.DataLoader(MNIST( "data/", download=True, train=True, transform=train_transforms), batch_size=64) test_loader = T.utils.data.DataLoader(MNIST("data/", download=True, train=False, transform=test_transforms), batch_size=64) if dataname == "cifar10": train_loader = T.utils.data.DataLoader(CIFAR10( "data/cifar10", download=True, train=True, transform=train_transforms), batch_size=64) test_loader = T.utils.data.DataLoader(CIFAR10( "data/cifar10", download=True, train=False, transform=test_transforms), batch_size=64) if dataname == "minerl": data = get_minerl_dataset(size=args.datasize) #train, test = data, data[-2000:] train_loader = T.utils.data.DataLoader(data, batch_size=32, shuffle=True)
def run_experiment( run_name, out_dir='./results', seed=None, device=None, # Training params bs_train=128, bs_test=None, batches=100, epochs=100, early_stopping=3, checkpoints=None, lr=1e-3, reg=1e-3, # Model params filters_per_layer=[64], layers_per_block=2, pool_every=2, hidden_dims=[1024], model_type='cnn', **kw): """ Executes a single run of a Part3 experiment with a single configuration. These parameters are populated by the CLI parser below. See the help string of each parameter for it's meaning. """ if not seed: seed = random.randint(0, 2**31) torch.manual_seed(seed) if not bs_test: bs_test = max([bs_train // 4, 1]) cfg = locals() tf = torchvision.transforms.ToTensor() ds_train = CIFAR10(root=DATA_DIR, download=True, train=True, transform=tf) ds_test = CIFAR10(root=DATA_DIR, download=True, train=False, transform=tf) if not device: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Select model class if model_type not in MODEL_TYPES: raise ValueError(f"Unknown model type: {model_type}") model_cls = MODEL_TYPES[model_type] # TODO: Train # - Create model, loss, optimizer and trainer based on the parameters. # Use the model you've implemented previously, cross entropy loss and # any optimizer that you wish. # - Run training and save the FitResults in the fit_res variable. # - The fit results and all the experiment parameters will then be saved # for you automatically. fit_res = None # ====== YOUR CODE: ====== dl_train = DataLoader(ds_train, bs_train, shuffle=False) dl_test = DataLoader(ds_test, bs_test, shuffle=False) channels = [] for channel in filters_per_layer: channels += [channel] * layers_per_block x0, _ = ds_train[0] in_size = x0.shape num_classes = 10 model = MODEL_TYPES[model_type](in_size=in_size, out_classes=num_classes, channels=channels, pool_every=pool_every, hidden_dims=hidden_dims) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) trainer = training.TorchTrainer(model, loss_fn, optimizer, device=device) kw['max_batches'] = batches fit_res = trainer.fit(dl_train=dl_train, dl_test=dl_test, num_epochs=epochs, early_stopping=early_stopping, **kw) # ======================== save_experiment(run_name, out_dir, cfg, fit_res)
def get_val_dataset(size=32): return CIFAR10('/home/piter/CIFAR10Dataset', train=False, transform=Compose( [Scale(size), CenterCrop(size), ToTensor(), STD_NORMALIZE]), download=True)
train_transform = Compose([ RandomCrop(32, padding=4), RandomHorizontalFlip(), # CIFAR10Policy(), ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)), # Cutout(1, 16), ]) test_transform = Compose([ ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)), ]) data_home = "/Users/hrvvi/Code/study/pytorch/datasets/CIFAR10" ds_train = CIFAR10(data_home, train=True, download=True, transform=train_transform) ds_test = CIFAR10(data_home, train=False, download=True, transform=test_transform) ds_train = train_test_split(ds_train, test_ratio=0.01)[1] ds_test = train_test_split(ds_test, test_ratio=0.01)[1] train_loader = DataLoader(ds_train, batch_size=32, shuffle=True, num_workers=2) test_loader = DataLoader(ds_test, batch_size=32) epochs = 200 # net = PreActResNet(28, 10) set_defaults({ 'init': { 'mode': 'fan_out', 'distribution': 'untruncated_normal' }, 'bn': {
from torch.utils.data import DataLoader from torchvision import transforms from torchvision.datasets import CIFAR10 import _utils CIFAR10_MEAN = (0.49139968, 0.48215827, 0.44653124) CIFAR10_STD = (24703233, 0.24348505, 0.26158768) TRAIN_SET = CIFAR10( root="data", train=True, download=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD), ]), ) VAL_SET = CIFAR10( root="data", train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD) ]), )
import torch.nn as nn import torch.nn.init as init import torchvision.transforms as transforms from torch.utils.data import DataLoader from torchvision.datasets import CIFAR10 from torchvision.utils import save_image from config.config import Config # -----------------------读取cifar-10数据集-------------------------- # 实例化配置文件 cfg = Config() device = 'cuda:0' if torch.cuda.is_available() else 'cpu' # 读取cifar-10数据集 dataset = CIFAR10(root=cfg.cifar_10_dir, transform=transforms.ToTensor(), download=True) # 数据加载器 data_loader = DataLoader(dataset=dataset, batch_size=64, shuffle=True) # 保存部分图像 for index, data in enumerate(data_loader): # 获得批次图片数据及批次大小 images, _ = data batch_size = images.size(0) print('#{} has {} images'.format(index, batch_size)) # 每100次进行一次保存 if index % 100 == 0: # 保存路径 path = '../result/gan_save_image/cifar10_batch_{:03d}.png'.format(
train_epochs = 10 device = 'cuda:0' if torch.cuda.is_available() else 'cpu' learning_rate = 1e-3 phase = 'train' num_heads = 3 softmax_files = [ 'outputs_softmax_resnet', 'outputs_softmax_resnext', 'outputs_softmax_shufflenet' ] ## End of Global args ## print("Device: {}".format(torch.cuda.get_device_name())) train_dataset = CIFAR10(root='datasets', train=False, transform=get_transforms(resize_shape=64), download=True) arr_softmax_outputs = [] for sf in softmax_files: arr_softmax_outputs.append( torch.load(os.path.join('softmax_outputs', '{}.pt'.format(sf)), map_location=torch.device('cpu'))) combined_output = torch.stack(tuple(arr_softmax_outputs), dim=1) train_dataset.targets = combined_output train_dataloader = get_dataloader(dataset=train_dataset, bs=train_batch_size, num_workers=train_dataloader_num_workers) num_classes = len(train_dataset.classes)
def fetch_dataloaders(args): # preprocessing transforms transform = T.Compose([ T.ToTensor(), # tensor in [0,1] lambda x: x.mul(255).div(2**(8 - args.n_bits)).floor(), # lower bits partial(preprocess, n_bits=args.n_bits) ]) # to model space [-1,1] target_transform = (lambda y: torch.eye(args.n_cond_classes)[y] ) if args.n_cond_classes else None if args.dataset == 'mnist': args.image_dims = (1, 28, 28) train_dataset = MNIST(args.data_path, train=True, transform=transform, target_transform=target_transform) valid_dataset = MNIST(args.data_path, train=False, transform=transform, target_transform=target_transform) elif args.dataset == 'cifar10': args.image_dims = (3, 32, 32) train_dataset = CIFAR10(args.data_path, train=True, transform=transform, target_transform=target_transform) valid_dataset = CIFAR10(args.data_path, train=False, transform=transform, target_transform=target_transform) elif args.dataset == 'colored-mnist': args.image_dims = (3, 28, 28) # NOTE -- data is quantized to 2 bits and in (N,H,W,C) format with open(args.data_path, 'rb' ) as f: # return dict {'train': np array; 'test': np array} data = pickle.load(f) # quantize to n_bits to match the transforms for other datasets and construct tensors in shape N,C,H,W train_data = torch.from_numpy( np.floor(data['train'].astype(np.float32) / (2**(2 - args.n_bits)))).permute(0, 3, 1, 2) valid_data = torch.from_numpy( np.floor(data['test'].astype(np.float32) / (2**(2 - args.n_bits)))).permute(0, 3, 1, 2) # preprocess to [-1,1] and setup datasets -- NOTE using 0s for labels to have a symmetric dataloader train_dataset = TensorDataset(preprocess(train_data, args.n_bits), torch.zeros(train_data.shape[0])) valid_dataset = TensorDataset(preprocess(valid_data, args.n_bits), torch.zeros(valid_data.shape[0])) else: raise RuntimeError('Dataset not recognized') if args.mini_data: # dataset to a single batch if args.dataset == 'colored-mnist': train_dataset = train_dataset.tensors[0][:args.batch_size] else: train_dataset.data = train_dataset.data[:args.batch_size] train_dataset.targets = train_dataset.targets[:args.batch_size] valid_dataset = train_dataset print( 'Dataset {}\n\ttrain len: {}\n\tvalid len: {}\n\tshape: {}\n\troot: {}' .format(args.dataset, len(train_dataset), len(valid_dataset), train_dataset[0][0].shape, args.data_path)) train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True, pin_memory=(args.device.type == 'cuda'), num_workers=4) valid_dataloader = DataLoader(valid_dataset, args.batch_size, shuffle=False, pin_memory=(args.device.type == 'cuda'), num_workers=4) # save a sample data_sample = next(iter(train_dataloader))[0] writer.add_image('data_sample', make_grid(data_sample, normalize=True, scale_each=True), args.step) save_image(data_sample, os.path.join(args.output_dir, 'data_sample.png'), normalize=True, scale_each=True) return train_dataloader, valid_dataloader
def cifar10(args, dataset_paths): """ Loads the CIFAR-10 dataset. Returns: train/valid/test set split dataloaders. """ transf = { 'train': transforms.Compose([ transforms.RandomHorizontalFlip(0.5), transforms.RandomCrop((args.crop_dim, args.crop_dim), padding=args.padding), transforms.ToTensor(), # Standardize()]), transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784))]), 'test': transforms.Compose([ transforms.ToTensor(), # Standardize()])} transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784))]) } config = {'train': True, 'test': False} datasets = {i: CIFAR10(root=dataset_paths[i], transform=transf[i], train=config[i], download=True) for i in config.keys()} # weighted sampler weights for full(f) training set f_s_weights = sample_weights(datasets['train'].targets) # return data, labels dicts for new train set and class-balanced valid set data, labels = random_split(data=datasets['train'].data, labels=datasets['train'].targets, n_classes=10, n_samples_per_class=np.repeat(500, 10).reshape(-1)) # define transforms for train set (without valid data) transf['train_'] = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(0.5), transforms.RandomCrop((args.crop_dim, args.crop_dim), padding=args.padding), transforms.ToTensor(), # Standardize()]) transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784))]) # define transforms for class-balanced valid set transf['valid'] = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), # Standardize()]) transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784))]) # save original full training set datasets['train_valid'] = datasets['train'] # make new training set without validation samples datasets['train'] = CustomDataset(data=data['train'], labels=labels['train'], transform=transf['train_']) # make class balanced validation set datasets['valid'] = CustomDataset(data=data['valid'], labels=labels['valid'], transform=transf['valid']) # weighted sampler weights for new training set s_weights = sample_weights(datasets['train'].labels) config = { 'train': WeightedRandomSampler(s_weights, num_samples=len(s_weights), replacement=True), 'train_valid': WeightedRandomSampler(f_s_weights, num_samples=len(f_s_weights), replacement=True), 'valid': None, 'test': None } if args.distributed: config = {'train': DistributedSampler(datasets['train']), 'train_valid': DistributedSampler(datasets['train_valid']), 'valid': None, 'test': None} dataloaders = {i: DataLoader(datasets[i], sampler=config[i], num_workers=8, pin_memory=True, drop_last=True, batch_size=args.batch_size) for i in config.keys()} return dataloaders
__author__ = 'Venushka Thisara' import torchvision.transforms as transforms from torchvision.datasets import CIFAR10 from torch.utils.data import Dataset, DataLoader import numpy as np # Transformations NRM = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) TT = transforms.ToTensor() # Transforms object for testset with NO augmentation transform_no_aug = transforms.Compose([TT, NRM]) # Downloading/Louding CIFAR10 data trainset = CIFAR10(root='./data', train=True, download=True) # , transform = transform_with_aug) testset = CIFAR10(root='./data', train=False, download=True) # , transform = transform_no_aug) classDict = { 'plane': 0, 'car': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9 }
from torchvision.datasets import CIFAR10 from torchvision.transforms.functional import to_tensor from torchvision.utils import save_image ds_path = "/mnt/hdd_1tb/datasets/cifar10" tv_ds = CIFAR10(ds_path) image_rgb = tv_ds[0][0] image_rgb_tensor = to_tensor(image_rgb) image_ycbcr = image_rgb.convert("YCbCr") image_ycbcr_tensor = to_tensor(image_ycbcr) save_image(image_ycbcr_tensor, "image_ycbcr_tensor.png") for channel in image_ycbcr_tensor: print( f"{channel.mean()=}, {channel.std()=}, {channel.min()=}, {channel.max()=}" )
''' import torch import torchvision from torchvision.datasets import CIFAR10 import torchvision.transforms as transforms torch.manual_seed(42) # Download training/test dataset # CIFAR10 dataset (images and labels) -> CONVERTED TO GRAYSCALE trans = transforms.Compose( [transforms.Grayscale(num_output_channels=1), transforms.ToTensor()]) dataset = CIFAR10(root='data/', train=True, transform=trans) test_dataset = CIFAR10(root='data/', train=False) ''' import matplotlib.pyplot as plt image, label = dataset[0] plt.imshow(image[0,:,:], cmap='gray') plt.show() print('Label:', label) ''' ''' # Check a sample image's dimensions img_tensor, label = dataset[0] print(img_tensor.shape, label) # 1x32x32 tensor (1st dim is the color channel -> grayscale -> originally 3x32x32) '''
def main(args): # Model getter: specify dataset and depth of the network. model = pytorchcv_wrapper.resnet("cifar10", depth=20, pretrained=False) # Or get a more specific model. E.g. wide resnet, with depth 40 and growth # factor 8 for Cifar 10. # model = pytorchcv_wrapper.get_model("wrn40_8_cifar10", pretrained=False) # --- CONFIG device = torch.device(f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu") device = "cpu" # --- TRANSFORMATIONS transform = transforms.Compose([ ToTensor(), transforms.Normalize((0.491, 0.482, 0.446), (0.247, 0.243, 0.261)), ]) # --- SCENARIO CREATION cifar_train = CIFAR10( root=expanduser("~") + "/.avalanche/data/cifar10/", train=True, download=True, transform=transform, ) cifar_test = CIFAR10( root=expanduser("~") + "/.avalanche/data/cifar10/", train=False, download=True, transform=transform, ) scenario = nc_benchmark( cifar_train, cifar_test, 5, task_labels=False, seed=1234, fixed_class_order=[i for i in range(10)], ) # choose some metrics and evaluation method interactive_logger = InteractiveLogger() eval_plugin = EvaluationPlugin( accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True), loss_metrics(minibatch=True, epoch=True, experience=True, stream=True), forgetting_metrics(experience=True), loggers=[interactive_logger], ) # CREATE THE STRATEGY INSTANCE (Naive, with Replay) cl_strategy = Naive( model, torch.optim.SGD(model.parameters(), lr=0.01), CrossEntropyLoss(), train_mb_size=100, train_epochs=1, eval_mb_size=100, device=device, plugins=[ReplayPlugin(mem_size=1000)], evaluator=eval_plugin, ) # TRAINING LOOP print("Starting experiment...") results = [] for experience in scenario.train_stream: print("Start of experience ", experience.current_experience) cl_strategy.train(experience) print("Training completed") print("Computing accuracy on the whole test set") results.append(cl_strategy.eval(scenario.test_stream))
def example(rank, world_size, nodeid, cmdlineArgs, train_dataset, test_dataset): commType = cmdlineArgs.comm_backend processes_per_node = cmdlineArgs.processes_per_node localrank = rank del rank globalrank = nodeid * processes_per_node + localrank if globalrank == 0: tstartexample = time.time() dist.init_process_group(commType, rank=globalrank, world_size=world_size) # Training settings device = torch.device(cmdlineArgs.device) if cmdlineArgs.scaling_type == "strong": # strong scaling: keep total batchsize constant by cutting down the amount of work per GPU # weak scaling: keep work done per GPU costant so that total batchsize grows as we add more GPUs. cmdlineArgs.batch_size = cmdlineArgs.batch_size // world_size print("Global Rank", globalrank, "World size", world_size, "Batch size", cmdlineArgs.batch_size) if not cmdlineArgs.random_data: print("using cifar10 dataset") augmentations = [ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), ] normalize = [ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ] train_transform = transforms.Compose(augmentations + normalize) test_transform = transforms.Compose(normalize) train_dataset = CIFAR10(root=cmdlineArgs.data_root, train=True, download=True, transform=train_transform) test_dataset = CIFAR10(root=cmdlineArgs.data_root, train=False, download=True, transform=test_transform) nlabels = 10 else: print("using randomly generated data") nlabels = cmdlineArgs.random_nlabels train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=world_size, rank=globalrank) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=cmdlineArgs.batch_size, shuffle= False, # set shuffle to false because the DistributedSampler already shuffles by default sampler=train_sampler, drop_last=True, num_workers=cmdlineArgs.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=cmdlineArgs.batch_size, shuffle=False, num_workers=cmdlineArgs.workers, ) run_results = [] for _ in range(cmdlineArgs.n_runs): print("Trying to make model on globalrank", globalrank) Model = getattr(torchvision.models, cmdlineArgs.model) model = DDP(Model(num_classes=nlabels).to(localrank), device_ids=[localrank], bucket_cap_mb=cmdlineArgs.bucket_cap) print(f"model successfully build on globalrank {globalrank}") optimizer = optim.SGD(model.parameters(), lr=cmdlineArgs.lr, momentum=0) if globalrank == 0: av_time_per_epoch = 0 for epoch in range(1, cmdlineArgs.epochs + 1): train_sampler.set_epoch(epoch) if (globalrank == 0) and (epoch > 1): t_start_epoch = time.time() av_losses = train(model, localrank, train_loader, optimizer, epoch) if (globalrank == 0) and (epoch > 1): t_end_epoch = time.time() av_time_per_epoch += t_end_epoch - t_start_epoch if (globalrank == 0) and (cmdlineArgs.epochs > 1): if cmdlineArgs.write_scaling_results: nodename = os.environ.get('MASTER_ADDR') if cmdlineArgs.random_data: dataset = "random" else: dataset = "cifar" fname = cmdlineArgs.results_root + dataset + "-" + nodename + "-" + cmdlineArgs.scaling_type + "-" + str( world_size) + "-" + str(cmdlineArgs.batch_size) fname += '-' + str(datetime.datetime.now()) + ".pyout" outputfile = open(fname, "w+") outputfile.write(f"node name:{nodename}\n") outputfile.write( f"batch-size: {cmdlineArgs.batch_size}\nepochs: {cmdlineArgs.epochs}\nprocesses per node: {cmdlineArgs.processes_per_node}\n" ) outputfile.write( f"comm backend: {cmdlineArgs.comm_backend}\nscaling type: {cmdlineArgs.scaling_type}\nbucket-cap: {cmdlineArgs.bucket_cap}\n" ) outputfile.write(f"model = {cmdlineArgs.model}\n") outputfile.write( f"number of samples in train set: {len(train_dataset)}\n") outputfile.write(f"num GPUs: {world_size}\n") outputfile.write("\n\n\nresults\n\n\n") if cmdlineArgs.random_data: outputfile.write( f"dimension of (square) input image: {cmdlineArgs.random_data_dim}\n" ) outputfile.write( f"num labels of random data: {cmdlineArgs.random_nlabels}\n" ) av_time_per_epoch /= (cmdlineArgs.epochs - 1) print(f"\n\n\nav_time_per_epoch={av_time_per_epoch}") print(f"number of samples in train set: {len(train_dataset)}") print(f"num GPUs: {world_size}") print( f"Av samples processed per second per GPU: {(len(train_dataset)/av_time_per_epoch)/world_size}" ) tendexample = time.time() print( f"total running time of example() on globalrank 0: {tendexample-tstartexample}" ) print("\n\n\n") if cmdlineArgs.write_scaling_results: outputfile.write(f"av_time_per_epoch={av_time_per_epoch}\n") outputfile.write(f"final av loss = {av_losses}\n") outputfile.write( f"Av samples processed per second per GPU: {(len(train_dataset)/av_time_per_epoch)/world_size}\n" ) outputfile.write( f"total running time of example() on globalrank 0: {tendexample-tstartexample}\n" ) outputfile.close() run_results.append(test(model, localrank, test_loader)) if len(run_results) > 1: print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format( len(run_results), np.mean(run_results) * 100, np.std(run_results) * 100)) repro_str = (f"resnet_{cmdlineArgs.lr}_" f"{cmdlineArgs.batch_size}_{cmdlineArgs.epochs}") if cmdlineArgs.saveModelResults: torch.save(run_results, f"run_results_{repro_str}.pt") if cmdlineArgs.save_model: torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
def __init__(self, args, cuda): kwargs = {'num_workers': 1, 'pin_memory': False} if cuda else {} # Default dataloader class dataloader_class = DataLoader if args.dataset_name == 'static_mnist': data_folder = './data/static_bin_mnist/' train_set = StaticBinaryMnist(data_folder, train=True, download=True, shuffle_init=True) test_set = StaticBinaryMnist(data_folder, train=False, download=True, shuffle_init=True) elif args.dataset_name == 'cifar10': # Discrete values 0, 1/255, ..., 254/255, 1 transform = transforms.Compose([ # Move values to the center of 256 bins # transforms.Lambda(lambda x: Image.eval( # x, lambda y: y * (255/256) + 1/512)), transforms.ToTensor(), ]) data_folder = './data/cifar10/' train_set = CIFAR10(data_folder, train=True, download=True, transform=transform) test_set = CIFAR10(data_folder, train=False, download=True, transform=transform) elif args.dataset_name == 'svhn': transform = transforms.ToTensor() data_folder = './data/svhn/' train_set = SVHN(data_folder, split='train', download=True, transform=transform) test_set = SVHN(data_folder, split='test', download=True, transform=transform) elif args.dataset_name == 'celeba': transform = transforms.Compose([ transforms.CenterCrop(148), transforms.Resize((64, 64)), transforms.ToTensor(), ]) data_folder = '/scratch/adit/data/celeba/' train_set = CelebA(data_folder, split='train', download=True, transform=transform) test_set = CelebA(data_folder, split='valid', download=True, transform=transform) elif args.dataset_name in multiobject_datasets: data_path = multiobject_paths[args.dataset_name] train_set = MultiObjectDataset(data_path, train=True) test_set = MultiObjectDataset(data_path, train=False) # Custom data loader class dataloader_class = MultiObjectDataLoader else: raise RuntimeError("Unrecognized data set '{}'".format( args.dataset_name)) self.train = dataloader_class(train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, **kwargs) self.test = dataloader_class(test_set, batch_size=args.test_batch_size, shuffle=False, **kwargs) self.data_shape = self.train.dataset[0][0].size() self.img_size = self.data_shape[1:] self.color_ch = self.data_shape[0]
def get_train_dataset(size=32): return CIFAR10('/home/piter/CIFAR10Dataset', train=True, transform=Compose( [Scale(size), RandomSizedCrop2(size, min_area=0.5), RandomHorizontalFlip(), ToTensor(), STD_NORMALIZE]), download=True)
def forward(self, input): output = self.net(input) output = output.view(-1, 256) output = self.fc(output) return output train_transformations = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train_set = CIFAR10(root="./data", train=True, transform=train_transformations, download=True) batch_size = 32 train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4) test_transformations = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) test_set = CIFAR10("./data", train=False,
'\nTest set ({:d} samples): Average loss: {:.4f}, Accuracy: {:.2f}%\n'. format(len(test_data), test_loss, 100 * acc)) return test_loss / len(test_data), 100 * acc if __name__ == '__main__': device = 'cuda' if torch.cuda.is_available() else 'cpu' batch_size = 100 epochs = 50 transform = tf.Compose([ tf.ToTensor(), tf.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ]) dataset_train = CIFAR10(root=r'C:\MyFiles\DLLabs\Lab1\Dataset\train', train=True, transform=transform, download=True) train_data = DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True, num_workers=2) dataset_test = CIFAR10(root=r'C:\MyFiles\DLLabs\Lab1\Dataset\test', train=False, transform=transform, download=True) test_data = DataLoader(dataset=dataset_test, batch_size=batch_size, shuffle=False, num_workers=2) CNN = CNN().to(device)