def get_ardis_dataset(): # load the data from csv's ardis_images=np.loadtxt('./data/ARDIS/ARDIS_train_2828.csv', dtype='float') ardis_labels=np.loadtxt('./data/ARDIS/ARDIS_train_labels.csv', dtype='float') #### reshape to be [samples][width][height] ardis_images = ardis_images.reshape(ardis_images.shape[0], 28, 28).astype('float32') # labels are one-hot encoded indices_seven = np.where(ardis_labels[:,7] == 1)[0] images_seven = ardis_images[indices_seven,:] images_seven = torch.tensor(images_seven).type(torch.uint8) labels_seven = torch.tensor([7 for y in ardis_labels]) ardis_dataset = EMNIST('./data', split="digits", train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) ardis_dataset.data = images_seven ardis_dataset.targets = labels_seven return ardis_dataset
def get_single_task(dataroot, task): tf = transforms.ToTensor() if task.startswith('EMNIST'): split = task.split('/', maxsplit=2)[1] dataroot = join(dataroot, 'emnist') tf_target = (lambda x: x - 1) if split == 'letters' else None output_size = 26 if split == 'letters' else 10 trainset = EMNIST(dataroot, split=split, train=True, transform=tf, target_transform=tf_target) trainset = stratified_subset(trainset, trainset.targets.tolist(), 500) testset = EMNIST(dataroot, split=split, train=False, transform=tf, target_transform=tf_target) elif task == 'KMNIST': dataroot = join(dataroot, 'kmnist') output_size = 10 trainset = KMNIST(dataroot, train=True, transform=tf) trainset = stratified_subset(trainset, trainset.targets.tolist(), 500) testset = KMNIST(dataroot, train=False, transform=tf) else: raise ValueError(task) return trainset, testset, output_size
def emnist_flat(split, batch_size, dev): kwargs = {} if dev == torch.device('cpu') \ else { 'num_workers': 4, 'pin_memory': True } train = EMNIST(EMNIST_ROOT, split, train=True, download=True, transform=flat_transform) test = EMNIST(EMNIST_ROOT, split, train=False, download=True, transform=flat_transform) train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, **kwargs) test_loader = DataLoader(test, batch_size=batch_size, **kwargs) return train_loader, test_loader
def main(args): device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu)) # create the dataset transform = transforms.Compose([transforms.ToTensor()]) if args.mode == 'train': train_dataset = EMNIST(root='./data', split='mnist', train=True, download=True, transform=transform) # train_dataset, _ = train_test_split(train_dataset, train_size=int(0.1*len(train_dataset))) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) if args.mode != 'custom_mnist': test_dataset = EMNIST(root='./data', split='mnist', train=False, download=True, transform=transform) if args.mode == 'custom_mnist': custom_transform = transforms.Compose([ lambda x: np.asarray(x), lambda x: cv2.cvtColor(255 - x, cv2.COLOR_RGB2GRAY), lambda x: cv2.GaussianBlur(x, (3, 3), 1), lambda x: get_roi(x), transforms.ToTensor(), transforms.Resize((28, 28)), lambda x: torch.transpose(x, 1, 2) ]) test_dataset = ImageFolder('./data/CustomMNIST', transform=custom_transform) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True) n_classes = len(test_dataset.classes) # create the model, loss function and optimizer model = MNISTClassifier(n_classes).to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # train and test if args.mode == "train": train(model, loss_fcn, device, optimizer, train_dataloader, test_dataloader, args) torch.save(model.state_dict(), MODEL_STATE_FILE) model.load_state_dict(torch.load(MODEL_STATE_FILE)) if args.mode != 'train': visualization_dataset64, _ = train_test_split(test_dataset, train_size=64) visualize_predictions(model, visualization_dataset64, device) return test(model, loss_fcn, device, test_dataloader)
def get_datasets(split='balanced', save=False): download_folder = './data' transform = Compose([ToTensor()]) dataset = ConcatDataset([ EMNIST(root=download_folder, split=split, download=True, train=False, transform=transform), EMNIST(root=download_folder, split=split, download=True, train=True, transform=transform) ]) # Ignore the code below with argument 'save' if save: random_seed = 4211 # do not change n_samples = len(dataset) eval_size = 0.2 indices = list(range(n_samples)) split = int(np.floor(eval_size * n_samples)) np.random.seed(random_seed) np.random.shuffle(indices) train_indices, eval_indices = indices[split:], indices[:split] # cut to half train_indices = train_indices[:len(train_indices) // 2] eval_indices = eval_indices[:len(eval_indices) // 2] np.savez('train_test_split.npz', train=train_indices, test=eval_indices) # just use save=False for students # load train test split indices else: with np.load('./train_test_split.npz') as f: train_indices = f['train'] eval_indices = f['test'] train_dataset = Subset(dataset, indices=train_indices) eval_dataset = Subset(dataset, indices=eval_indices) return train_dataset, eval_dataset
def letters(root): from torchvision.datasets import EMNIST transform = transforms.Compose([ lambda x: x.convert("RGB"), transforms.Resize(224), transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (1., 1., 1.)), ]) trainset = EMNIST(root, train=True, split='letters', transform=transform, download=True) testset = EMNIST(root, train=False, split='letters', transform=transform) return trainset, testset
def get_dataloaders_using_pytorch(): # download dataset root = '../data/emnist_pthdata' # transforms for train and test train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomAffine(degrees=10, translate=(0.2, 0.2), scale=(0.8, 1.2)), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5])]) train_dataset = EMNIST(root=root, split='balanced', download=True, train=True, transform=train_transform) test_dataset = EMNIST(root=root, split='balanced', download=True, train=False, transform=test_transform) train_loader = DataLoader(dataset=train_dataset, batch_size=192, drop_last=True, sampler=ImbalancedDatasetSampler(train_dataset)) test_loader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=64) return train_loader, test_loader
def get_emnist_data(transform=None, RGB=True): """Returns EMNIST train and test datasets. This function is assumed to be primarily used as seed data. DataLoaders and data splits are in synthesis.py Parameters: transform: Relevant Torchvision transforms to apply to EMNIST RGB: A boolean value that decides if the images are RGB Returns: Two Torchvision datasets with the EMNIST train and test sets """ if transform is None and (RGB is True): transform = transforms.Compose([ transforms.Lambda(lambda image: image.convert("RGB")), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )), ]) elif transform is None and (RGB is False): transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) emnist_train = EMNIST(os.getcwd(), split="digits", train=True, download=True, transform=transform) emnist_test = EMNIST(os.getcwd(), split="digits", train=False, download=True, transform=transform) return emnist_train, emnist_test
def get_test_loader(self, filename=None): dataset = EMNIST(os.path.join(datasets_path, 'emnist'), split='balanced', download=True, train=False) def transform(x): return torch.bernoulli(x.unsqueeze(-3).float().div(255)) filename = self.testfile if filename is None else filename classes = torch.arange(20, 47) if self.novel else None return get_saved_data_loader(dataset, filename, TEST_NUM_PER_CLASS, transform=transform, classes=classes)
def __build_truncated_dataset__(self): emnist_dataobj = EMNIST(self.root, split="digits", train=self.train, transform=self.transform, target_transform=self.target_transform, download=self.download) if self.train: data = emnist_dataobj.train_data target = emnist_dataobj.train_labels else: data = emnist_dataobj.test_data target = emnist_dataobj.test_labels if self.dataidxs is not None: data = data[self.dataidxs] target = target[self.dataidxs] return data, target
def get_train_loader(self): dataset = EMNIST(os.path.join(datasets_path, 'emnist'), split='balanced', download=True, train=True) def transform(x): return torch.bernoulli(x.unsqueeze(-3).float().div(255)) classes = torch.arange(20) if self.novel else None return get_random_data_loader(dataset, self.B, self.N, self.K, self.num_steps, TRAIN_NUM_PER_CLASS, transform=transform, classes=classes)
def load_or_generate_data(self) -> None: """Fetch the EMNIST dataset.""" dataset = EMNIST( root=DATA_DIRNAME, split="byclass", train=self.train, download=False, transform=None, target_transform=None, ) self._data = dataset.data self._targets = dataset.targets if self.sample_to_balance: self._sample_to_balance() if self.subsample_fraction is not None: self._subsample()
def load_dataset(data_transforms, batch_size=1024, num_workers=0, root=DATA_ROOT, split='digits'): datasets = {} for name in ('train', 'valid'): is_training = name == 'train' dataset = EMNIST(root=root, split=split, train=is_training, download=True, transform=data_transforms[name]) loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers) datasets[name] = {'dataset': dataset, 'loader': loader} return datasets
def __init__(self, letters, n_cap): """ Create a data object to house aspects of the data :param letters: a tuple of the English letters wanting to plot :param n_cap: capped observations """ self.letters = letters # Required PyTorch transform for this project transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) # Fetch the dataset self.emnist = EMNIST(root="./data", split='bymerge', download=True) # Split self.data_ind, self.targ, self.flat_X, self.labels, self.X = self.filter_by_label( n_cap)
def __build_truncated_dataset__(self): emnist_dataobj = EMNIST(self.root, split="digits", train=self.train, transform=self.transform, target_transform=self.target_transform, download=self.download) if self.train: data = emnist_dataobj.data target = np.array(emnist_dataobj.targets) else: data = emnist_dataobj.data target = np.array(emnist_dataobj.targets) if self.dataidxs is not None: data = data[self.dataidxs] target = target[self.dataidxs] data = np.append(data, self.saved_ardis_dataset_train, axis=0) target = np.append(target, self.saved_ardis_label_train, axis=0) return data, target
def sample(self, B, N, K, **kwargs): dataset = EMNIST(os.path.join(datasets_path, 'emnist'), split='balanced', download=True, train=False) def transform(x): return torch.bernoulli(x.unsqueeze(-3).float().div(255)) loader = get_random_data_loader( dataset, B, N, K, 1, TEST_NUM_PER_CLASS, transform=transform, classes=(torch.arange(20, 47) if self.novel else None), transform=transform, **kwargs) return next(iter(loader))
def main(): # Training settings parser = argparse.ArgumentParser( description='run approximation to LeNet on Mnist') parser.add_argument('--batch-size', type=int, default=512, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=64, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--approx-epochs', type=int, default=200, metavar='N', help='number of epochs to approx (default: 10)') parser.add_argument('--lr', type=float, default=1e-2, metavar='LR', help='learning rate (default: 0.0005)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--dropout-rate', type=float, default=0.5, metavar='p_drop', help='dropout rate') parser.add_argument( '--S', type=int, default=100, metavar='N', help='number of posterior samples from the Bayesian model') parser.add_argument( '--model-path', type=str, default='../saved_models/emnist_mcdp/', help='number of posterior samples from the Bayesian model') parser.add_argument('--save-approx-model', type=int, default=0, metavar='N', help='save approx model or not? default not') parser.add_argument('--from-approx-model', type=int, default=1, metavar='N', help='if our model is loaded or trained') parser.add_argument('--test-ood-from-disk', type=int, default=1, help='generate test samples or load from disk') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 8, 'pin_memory': False} if use_cuda else {} tr_data = EMNIST( '../../data', split='balanced', train=True, transform=transforms.Compose([ # transforms.ToPILImage(), transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]), download=True) te_data = EMNIST( '../../data', split='balanced', train=False, transform=transforms.Compose([ # transforms.ToPILImage(), transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]), download=True) ood_data = datasets.Omniglot('../data', download=True, transform=transforms.Compose([ transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )), ])) train_loader = torch.utils.data.DataLoader(tr_data, batch_size=args.batch_size, shuffle=False, **kwargs) test_loader = torch.utils.data.DataLoader(te_data, batch_size=args.batch_size, shuffle=False, **kwargs) ood_loader = torch.utils.data.DataLoader(ood_data, batch_size=args.batch_size, shuffle=False, **kwargs) model = mnist_net().to(device) model.load_state_dict(torch.load(args.model_path + 'mcdp-emnist.pt')) test(args, model, device, test_loader) if args.from_approx_model == 0: output_samples = torch.load(args.model_path + 'emnist-mcdp-samples.pt') # --------------- training approx --------- print('approximating ...') fmodel = mnist_net_f().to(device) gmodel = mnist_net_g().to(device) if args.from_approx_model == 0: g_optimizer = optim.SGD(gmodel.parameters(), lr=args.lr, momentum=args.momentum) f_optimizer = optim.SGD(fmodel.parameters(), lr=args.lr, momentum=args.momentum) best_acc = 0 for epoch in range(1, args.approx_epochs + 1): train_approx(args, fmodel, gmodel, device, train_loader, f_optimizer, g_optimizer, output_samples, epoch) acc = test(args, fmodel, device, test_loader) if acc > best_acc: torch.save(fmodel.state_dict(), args.model_path + 'mcdp-emnist-mean-emd.pt') torch.save(gmodel.state_dict(), args.model_path + 'mcdp-emnist-conc-emd.pt') best_acc = acc else: fmodel.load_state_dict( torch.load(args.model_path + 'mcdp-emnist-mean-emd.pt')) gmodel.load_state_dict( torch.load(args.model_path + 'mcdp-emnist-conc-emd.pt')) print('generating teacher particles for testing&ood data ...') # generate particles for test and ood dataset model.train() if args.test_ood_from_disk == 1: teacher_test_samples = torch.load(args.model_path + 'emnist-mcdp-test-samples.pt') else: with torch.no_grad(): # obtain ensemble outputs all_samples = [] for i in range(500): samples_a_round = [] for data, target in test_loader: data = data.to(device) output = F.softmax(model(data)) samples_a_round.append(output) samples_a_round = torch.cat(samples_a_round).cpu() all_samples.append(samples_a_round) teacher_test_samples = torch.stack(all_samples).permute(1, 0, 2) torch.save(all_samples, args.model_path + 'emnist-mcdp-test-samples.pt') if args.test_ood_from_disk == 1: teacher_ood_samples = torch.load( args.model_path + 'omniglot-mcdp-ood-samples-trd-emnist.pt') else: with torch.no_grad(): # obtain ensemble outputs all_samples = [] for i in range(500): samples_a_round = [] for data, target in ood_loader: data = data.to(device) output = F.softmax(model(data)) samples_a_round.append(output) samples_a_round = torch.cat(samples_a_round).cpu() all_samples.append(samples_a_round) teacher_ood_samples = torch.stack(all_samples).permute(1, 0, 2) torch.save( all_samples, args.model_path + 'omniglot-mcdp-ood-samples-trd-emnist.pt') eval_approx(args, fmodel, gmodel, device, test_loader, ood_loader, teacher_test_samples, teacher_ood_samples)
def get_loader(dataset, path, bsz, cifar_c_type=None): if dataset == 'cifar': mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1944, 0.2010) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) cifar_dataset = datasets.CIFAR10(root=path, train=True, download=True, transform=train_transform) num_train = len(cifar_dataset) indices = torch.randperm(num_train).tolist() valid_size = 2048 train_idx, valid_idx = indices[valid_size:], indices[:valid_size] train_dataset = data.Subset(cifar_dataset, train_idx) valid_dataset = data.Subset(cifar_dataset, valid_idx) train_loader = data.DataLoader(train_dataset, batch_size=bsz, shuffle=True, drop_last=True) valid_loader = data.DataLoader(valid_dataset, batch_size=2000, shuffle=True) return (train_loader, valid_loader) elif dataset == 'cifar_c': mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1944, 0.2010) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) cifar_c = CIFAR10_C(path, c_type=cifar_c_type, transform=train_transform) valid_c_loader = data.DataLoader(cifar_c, batch_size=500, shuffle=False) return valid_c_loader elif dataset == 'mnist': train_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=0.1307, std=0.3081) ]) mnist_dataset = MNIST(root=path, transform=train_transform) num_train = len(mnist_dataset) indices = torch.randperm(num_train).tolist() valid_size = 2048 train_idx, valid_idx = indices[valid_size:], indices[:valid_size] train_dataset = data.Subset(mnist_dataset, train_idx) valid_dataset = data.Subset(mnist_dataset, valid_idx) train_loader = data.DataLoader(train_dataset, batch_size=bsz, shuffle=True, drop_last=True) valid_loader = data.DataLoader(valid_dataset, batch_size=2000, shuffle=True, drop_last=True) return (train_loader, valid_loader) elif dataset == 'mnist_r': rot = [15, 30, 45, 60, 75] rot_loader = [] for i in range(5): rotation = transforms.Compose([ transforms.RandomRotation(degrees=(rot[i], rot[i])), transforms.ToTensor(), transforms.Normalize(mean=0.1307, std=0.3081) ]) rotation_dataset = MNIST(root=path, transform=rotation) rot_loader.append( data.DataLoader(rotation_dataset, batch_size=500, shuffle=False)) return rot_loader elif dataset == 'svhn': mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1944, 0.2010) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) svhn_dataset = SVHN(path, download=True, transform=train_transform) svhn_loader = data.DataLoader(dataset=svhn_dataset, batch_size=500, shuffle=True) return svhn_loader elif dataset == 'lsun': mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1944, 0.2010) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) lsun_dataset = LSUN(path, transform=train_transform) lsun_loader = data.DataLoader(dataset=lsun_dataset, batch_size=500, shuffle=True) return lsun_loader elif dataset == 'tiny': mean = (0.4914, 0.4822, 0.4465) std = (0.2023, 0.1944, 0.2010) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) tiny_dataset = TINY(path, transform=train_transform) tiny_loader = data.DataLoader(dataset=tiny_dataset, batch_size=500, shuffle=True) return tiny_loader elif dataset == 'fmnist': train_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=0.1307, std=0.3081) ]) fmnist_dataset = FashionMNIST(root=path, train=True, download=True, transform=train_transform) fmnist_loader = data.DataLoader(dataset=fmnist_dataset, batch_size=500) return fmnist_loader elif dataset == 'emnist': train_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=0.1307, std=0.3081) ]) emnist_dataset = EMNIST(root=path, train=True, split='letters', download=True, transform=train_transform) emnist_loader = data.DataLoader(dataset=emnist_dataset, batch_size=500) return emnist_loader elif dataset == 'nmnist': train_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=0.1307, std=0.3081) ]) nmnist_dataset = ImageFolder(root=path, transform=train_transform) nmnist_loader = data.DataLoader(dataset=nmnist_dataset, batch_size=500) return nmnist_loader
import os import torch import torch.nn as nn import torch.nn.functional as F from torchvision.datasets import EMNIST from experiments import ROOT_DIR dataset = EMNIST(os.path.join(ROOT_DIR, 'data', 'EMNIST'), split='letters', train=True, download=True) class CNN(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=5, stride=1) self.mp1 = nn.MaxPool2d(kernel_size=2) self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1) self.mp2 = nn.MaxPool2d(kernel_size=2) def forward(self, x):
def main(): # Training settings parser = argparse.ArgumentParser( description='Training MCDP Bayes teacher and sampling') parser.add_argument('--batch-size', type=int, default=256, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=100, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=300, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.0005)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--dropout-rate', type=float, default=0.5, metavar='p_drop', help='dropout rate') parser.add_argument('--S', type=int, default=500, metavar='N', help='number of posterior samples') parser.add_argument( '--model-path', type=str, default='../saved_models/emnist_mcdp/', metavar='N', help='number of posterior samples from the Bayesian model') parser.add_argument('--from-model', type=int, default=1, metavar='N', help='if our model is loaded or trained') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 8, 'pin_memory': True} if use_cuda else {} tr_data = EMNIST('../data', split='balanced', train=True, transform=transforms.Compose([ transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]), download=True) te_data = EMNIST('../data', split='balanced', train=False, transform=transforms.Compose([ transforms.Resize((28, 28)), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]), download=True) train_loader = torch.utils.data.DataLoader(tr_data, batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(te_data, batch_size=args.batch_size, shuffle=True, **kwargs) model = mnist_net().to(device) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) # --------------- train or load teacher ----------- if args.from_model == 1: print('loading teacher model ...') model.load_state_dict(torch.load(args.model_path + 'mcdp-emnist.pt')) else: print('training teacher model ...') schedule = [50, 100, 150, 200, 250] best = 0 for epoch in range(1, args.epochs + 1): if epoch in schedule: for g in optimizer.param_groups: g['lr'] *= 0.5 train_bayesian(args, model, device, train_loader, optimizer, epoch) print("teacher training epoch: {}".format(epoch)) test_acc = test(args, model, device, test_loader) if test_acc > best: torch.save(model.state_dict(), args.model_path + 'mcdp-emnist.pt') best = test_acc train_loader = torch.utils.data.DataLoader(tr_data, batch_size=args.batch_size, shuffle=False, **kwargs) print('generating particles for training data ...') # for an easier training of amortized approximation, # instead of sampling param. during approx, # get particles on simplex and store them first. with torch.no_grad(): all_samples = [] for i in range(500): samples_a_round = [] for data, target in train_loader: data = data.to(device) output = F.softmax(model(data)) samples_a_round.append(output) samples_a_round = torch.cat(samples_a_round).cpu() all_samples.append(samples_a_round) all_samples = torch.stack(all_samples).permute(1, 0, 2) torch.save(all_samples, args.model_path + 'emnist-mcdp-samples.pt')
def main(seed=0, n_neurons=100, n_train=60000, n_test=10000, inhib=100, lr=1e-2, lr_decay=1, time=350, dt=1, theta_plus=0.05, theta_decay=1e-7, intensity=1, progress_interval=10, update_interval=250, plot=False, train=True, gpu=False): assert n_train % update_interval == 0 and n_test % update_interval == 0, \ 'No. examples must be divisible by update_interval' params = [ seed, n_neurons, n_train, inhib, lr, lr_decay, time, dt, theta_plus, theta_decay, intensity, progress_interval, update_interval ] test_params = [ seed, n_neurons, n_train, n_test, inhib, lr, lr_decay, time, dt, theta_plus, theta_decay, intensity, progress_interval, update_interval ] model_name = '_'.join([str(x) for x in params]) np.random.seed(seed) if gpu: torch.set_default_tensor_type('torch.cuda.FloatTensor') torch.cuda.manual_seed_all(seed) else: torch.manual_seed(seed) n_examples = n_train if train else n_test n_sqrt = int(np.ceil(np.sqrt(n_neurons))) n_classes = 26 # Build network. if train: network = DiehlAndCook2015v2(n_inpt=784, n_neurons=n_neurons, inh=inhib, dt=dt, norm=78.4, theta_plus=theta_plus, theta_decay=theta_decay, nu=[0, lr]) else: network = load_network(os.path.join(params_path, model_name + '.pt')) network.connections['X', 'Y'].update_rule = NoOp( connection=network.connections['X', 'Y'], nu=network.connections['X', 'Y'].nu) network.layers['Y'].theta_decay = 0 network.layers['Y'].theta_plus = 0 # Load EMNIST data. dataset = EMNIST(root=data_path, split='letters', train=train, download=True) if train: images = dataset.train_data.float() labels = dataset.train_labels.long() else: images = dataset.test_data.float() labels = dataset.test_labels.long() if gpu: images = images.cuda() labels = labels.cuda() permutation = torch.randperm(images.size(0)) images = images[permutation] labels = labels[permutation] images = images.view(-1, 784) images *= intensity labels -= 1 # Record spikes during the simulation. spike_record = torch.zeros(update_interval, time, n_neurons) # Neuron assignments and spike proportions. if train: assignments = -torch.ones_like(torch.Tensor(n_neurons)) proportions = torch.zeros_like(torch.Tensor(n_neurons, n_classes)) rates = torch.zeros_like(torch.Tensor(n_neurons, n_classes)) ngram_scores = {} else: path = os.path.join(params_path, '_'.join(['auxiliary', model_name]) + '.pt') assignments, proportions, rates, ngram_scores = torch.load( open(path, 'rb')) # Sequence of accuracy estimates. curves = {'all': [], 'proportion': [], 'ngram': []} predictions = {scheme: torch.Tensor().long() for scheme in curves.keys()} if train: best_accuracy = 0 spikes = {} for layer in set(network.layers) - {'X'}: spikes[layer] = Monitor(network.layers[layer], state_vars=['s'], time=time) network.add_monitor(spikes[layer], name='%s_spikes' % layer) # Train the network. if train: print('\nBegin training.\n') else: print('\nBegin test.\n') inpt_axes = None inpt_ims = None spike_ims = None spike_axes = None weights_im = None assigns_im = None perf_ax = None start = t() for i in range(n_examples): if i % progress_interval == 0: print(f'Progress: {i} / {n_examples} ({t() - start:.4f} seconds)') start = t() if i % update_interval == 0 and i > 0: if train: network.connections['X', 'Y'].update_rule.nu[1] *= lr_decay if i % len(labels) == 0: current_labels = labels[-update_interval:] else: current_labels = labels[i % len(images) - update_interval:i % len(images)] # Update and print accuracy evaluations. curves, preds = update_curves(curves, current_labels, n_classes, spike_record=spike_record, assignments=assignments, proportions=proportions, ngram_scores=ngram_scores, n=2) print_results(curves) for scheme in preds: predictions[scheme] = torch.cat( [predictions[scheme], preds[scheme]], -1) # Save accuracy curves to disk. to_write = ['train'] + params if train else ['test'] + params f = '_'.join([str(x) for x in to_write]) + '.pt' torch.save((curves, update_interval, n_examples), open(os.path.join(curves_path, f), 'wb')) if train: if any([x[-1] > best_accuracy for x in curves.values()]): print( 'New best accuracy! Saving network parameters to disk.' ) # Save network to disk. network.save(os.path.join(params_path, model_name + '.pt')) path = os.path.join( params_path, '_'.join(['auxiliary', model_name]) + '.pt') torch.save((assignments, proportions, rates, ngram_scores), open(path, 'wb')) best_accuracy = max([x[-1] for x in curves.values()]) # Assign labels to excitatory layer neurons. assignments, proportions, rates = assign_labels( spike_record, current_labels, n_classes, rates) # Compute ngram scores. ngram_scores = update_ngram_scores(spike_record, current_labels, n_classes, 2, ngram_scores) print() # Get next input sample. image = images[i % len(images)] sample = poisson(datum=image, time=time, dt=dt) inpts = {'X': sample} # Run the network on the input. network.run(inpts=inpts, time=time) retries = 0 while spikes['Y'].get('s').sum() < 5 and retries < 3: retries += 1 image *= 2 sample = poisson(datum=image, time=time, dt=dt) inpts = {'X': sample} network.run(inpts=inpts, time=time) # Add to spikes recording. spike_record[i % update_interval] = spikes['Y'].get('s').t() # Optionally plot various simulation information. if plot: # _input = image.view(28, 28) # reconstruction = inpts['X'].view(time, 784).sum(0).view(28, 28) _spikes = {layer: spikes[layer].get('s') for layer in spikes} input_exc_weights = network.connections[('X', 'Y')].w square_weights = get_square_weights( input_exc_weights.view(784, n_neurons), n_sqrt, 28) # square_assignments = get_square_assignments(assignments, n_sqrt) # inpt_axes, inpt_ims = plot_input(_input, reconstruction, label=labels[i], axes=inpt_axes, ims=inpt_ims) spike_ims, spike_axes = plot_spikes(_spikes, ims=spike_ims, axes=spike_axes) weights_im = plot_weights(square_weights, im=weights_im) # assigns_im = plot_assignments(square_assignments, im=assigns_im) # perf_ax = plot_performance(curves, ax=perf_ax) plt.pause(1e-8) network.reset_() # Reset state variables. print(f'Progress: {n_examples} / {n_examples} ({t() - start:.4f} seconds)') i += 1 if i % len(labels) == 0: current_labels = labels[-update_interval:] else: current_labels = labels[i % len(images) - update_interval:i % len(images)] # Update and print accuracy evaluations. curves, preds = update_curves(curves, current_labels, n_classes, spike_record=spike_record, assignments=assignments, proportions=proportions, ngram_scores=ngram_scores, n=2) print_results(curves) for scheme in preds: predictions[scheme] = torch.cat([predictions[scheme], preds[scheme]], -1) if train: if any([x[-1] > best_accuracy for x in curves.values()]): print('New best accuracy! Saving network parameters to disk.') # Save network to disk. if train: network.save(os.path.join(params_path, model_name + '.pt')) path = os.path.join( params_path, '_'.join(['auxiliary', model_name]) + '.pt') torch.save((assignments, proportions, rates, ngram_scores), open(path, 'wb')) if train: print('\nTraining complete.\n') else: print('\nTest complete.\n') print('Average accuracies:\n') for scheme in curves.keys(): print('\t%s: %.2f' % (scheme, float(np.mean(curves[scheme])))) # Save accuracy curves to disk. to_write = ['train'] + params if train else ['test'] + params f = '_'.join([str(x) for x in to_write]) + '.pt' torch.save((curves, update_interval, n_examples), open(os.path.join(curves_path, f), 'wb')) # Save results to disk. results = [ np.mean(curves['all']), np.mean(curves['proportion']), np.mean(curves['ngram']), np.max(curves['all']), np.max(curves['proportion']), np.max(curves['ngram']) ] to_write = params + results if train else test_params + results to_write = [str(x) for x in to_write] name = 'train.csv' if train else 'test.csv' if not os.path.isfile(os.path.join(results_path, name)): with open(os.path.join(results_path, name), 'w') as f: if train: f.write( 'random_seed,n_neurons,n_train,inhib,lr,lr_decay,time,timestep,theta_plus,theta_decay,intensity,' 'progress_interval,update_interval,mean_all_activity,mean_proportion_weighting,' 'mean_ngram,max_all_activity,max_proportion_weighting,max_ngram\n' ) else: f.write( 'random_seed,n_neurons,n_train,n_test,inhib,lr,lr_decay,time,timestep,theta_plus,theta_decay,' 'intensity,progress_interval,update_interval,mean_all_activity,mean_proportion_weighting,' 'mean_ngram,max_all_activity,max_proportion_weighting,max_ngram\n' ) with open(os.path.join(results_path, name), 'a') as f: f.write(','.join(to_write) + '\n') if labels.numel() > n_examples: labels = labels[:n_examples] else: while labels.numel() < n_examples: if 2 * labels.numel() > n_examples: labels = torch.cat( [labels, labels[:n_examples - labels.numel()]]) else: labels = torch.cat([labels, labels]) # Compute confusion matrices and save them to disk. confusions = {} for scheme in predictions: confusions[scheme] = confusion_matrix(labels, predictions[scheme]) to_write = ['train'] + params if train else ['test'] + test_params f = '_'.join([str(x) for x in to_write]) + '.pt' torch.save(confusions, os.path.join(confusion_path, f))
def select_dataset(dataset_name, input_dim=2, n_samples=10000): """ :params n_samples: number of points returned. If 0, all datapoints will be returned. For artificial data, it will throw an error. """ if dataset_name == 'fmnist': f_mnist = FashionMNIST(root="./datasets", download=True) data = f_mnist.data.numpy() vec_data = np.reshape(data, (data.shape[0], -1)) vec_data = np.float32(vec_data) labels = f_mnist.targets.numpy() elif dataset_name == 'emnist': f_mnist = EMNIST(root="./datasets", download=True, split='byclass') data = f_mnist.data.numpy() vec_data = np.reshape(data, (data.shape[0], -1)) vec_data = np.float32(vec_data) labels = f_mnist.targets.numpy() elif dataset_name == 'kmnist': f_mnist = KMNIST(root="./datasets", download=True) data = f_mnist.data.numpy() vec_data = np.reshape(data, (data.shape[0], -1)) vec_data = np.float32(vec_data) labels = f_mnist.targets.numpy() elif dataset_name == 'usps': f_mnist = USPS(root="./datasets", download=True) data = f_mnist.data vec_data = np.reshape(data, (data.shape[0], -1)) vec_data = np.float32(vec_data) labels = np.float32(f_mnist.targets) elif dataset_name == 'news': newsgroups_train = fetch_20newsgroups(data_home='./datasets', subset='train', remove=('headers', 'footers', 'quotes')) vectorizer = TfidfVectorizer() vec_data = vectorizer.fit_transform(newsgroups_train.data).toarray() vec_data = np.float32(vec_data) labels = newsgroups_train.target labels = np.float32(labels) elif dataset_name == 'cover_type': file_name = file_path + "/datasets/covtype.data" train_data = np.array(pd.read_csv(file_name, sep=',')) vec_data = np.float32(train_data[:, :-1]) labels = np.float32(train_data[:, -1]) elif dataset_name == 'char': digits = datasets.load_digits() n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) vec_data = np.float32(data) labels = digits.target elif dataset_name == 'charx': file_name = file_path + "/datasets/char_x.npy" data = np.load(file_name, allow_pickle=True) vec_data, labels = data[0], data[1] elif dataset_name == 'kdd_cup': cover_train = fetch_kddcup99(data_home='./datasets', download_if_missing=True) vec_data = cover_train.data string_labels = cover_train.target vec_data, labels = feature_tranformers.vectorizer_kdd(data=vec_data, labels=string_labels) elif dataset_name == 'aggregation': file_name = file_path + "/2d_data/Aggregation.csv" a = np.array(pd.read_csv(file_name, sep=';')) vec_data = a[:, 0:2] labels = a[:, 2] elif dataset_name == 'compound': file_name = file_path + "/2d_data/Compound.txt" a = np.array(pd.read_csv(file_name, sep='\t')) vec_data = a[:, 0:2] labels = a[:, 2] elif dataset_name == 'd31': file_name = file_path + "/2d_data/D31.txt" a = np.array(pd.read_csv(file_name, sep='\t')) vec_data = a[:, 0:2] labels = a[:, 2] elif dataset_name == 'flame': file_name = file_path + "/2d_data/flame.txt" a = np.array(pd.read_csv(file_name, sep='\t')) vec_data = a[:, 0:2] labels = a[:, 2] elif dataset_name == 'path_based': file_name = file_path + "/2d_data/pathbased.txt" a = np.array(pd.read_csv(file_name, sep='\t')) vec_data = a[:, 0:2] labels = a[:, 2] elif dataset_name == 'r15': file_name = file_path + "/2d_data/R15.txt" a = np.array(pd.read_csv(file_name, sep='\t')) vec_data = a[:, 0:2] labels = a[:, 2] elif dataset_name == 'spiral': file_name = file_path + "/2d_data/spiral.txt" a = np.array(pd.read_csv(file_name, sep='\t')) vec_data = a[:, 0:2] labels = a[:, 2] elif dataset_name == 'birch1': file_name = file_path + "/2d_data/birch1.txt" a = np.array(pd.read_csv(file_name, delimiter=r"\s+")) vec_data = a[:, :] labels = np.ones((vec_data.shape[0])) elif dataset_name == 'birch2': file_name = file_path + "/2d_data/birch2.txt" a = np.array(pd.read_csv(file_name, delimiter=r"\s+")) vec_data = a[:, :] labels = np.ones((vec_data.shape[0])) elif dataset_name == 'birch3': file_name = file_path + "/2d_data/birch3.txt" a = np.array(pd.read_csv(file_name, delimiter=r"\s+")) vec_data = a[:, :] labels = np.ones((vec_data.shape[0])) elif dataset_name == 'worms': file_name = file_path + "/2d_data/worms/worms_2d.txt" a = np.array(pd.read_csv(file_name, sep=' ')) vec_data = a[:, :] labels = np.ones((vec_data.shape[0])) elif dataset_name == 't48k': file_name = file_path + "/2d_data/t4.8k.txt" a = np.array(pd.read_csv(file_name, sep=' ')) vec_data = a[1:, :] labels = np.ones((vec_data.shape[0])) elif dataset_name == 'moons': data, labels = make_moons(n_samples=5000) vec_data = np.float32(data) labels = np.float32(labels) elif dataset_name == 'circles': data, labels = make_circles(n_samples=5000) vec_data = np.float32(data) labels = np.float32(labels) elif dataset_name == 'blobs': data, labels = make_blobs(n_samples=n_samples, centers=3) vec_data = np.float32(data) labels = np.float32(labels) elif dataset_name == 'gmm': mean_1 = np.zeros(input_dim) mean_2 = 100 * np.ones(input_dim) cov = np.eye(input_dim) data_1 = np.random.multivariate_normal(mean_1, cov, int(n_samples / 2)) labels_1 = np.ones(int(n_samples / 2)) labels_2 = 2 * np.ones(int(n_samples / 2)) data_2 = np.random.multivariate_normal(mean_2, cov, int(n_samples / 2)) vec_data = np.concatenate([data_1, data_2], axis=0) labels = np.concatenate([labels_1, labels_2], axis=0) elif dataset_name == 'uniform': vec_data = np.random.uniform(0, 1, size=(n_samples, input_dim)) * 10 labels = np.ones(n_samples) elif dataset_name == 'mnist_pc': d_mnist = MNIST(root="./datasets", download=True) mnist = d_mnist.data.numpy() data = np.float32(np.reshape(mnist, (mnist.shape[0], -1))) pca_data = PCA(n_components=input_dim).fit_transform(data) n_indices = np.random.randint(pca_data.shape[0], size=n_samples) vec_data = pca_data[n_indices] labels = d_mnist.targets.numpy()[n_indices] elif dataset_name == 'usps_pc': d_mnist = USPS(root="./datasets", download=True) mnist = d_mnist.data data = np.float32(np.reshape(mnist, (mnist.shape[0], -1))) pca_data = PCA(n_components=input_dim).fit_transform(data) n_indices = np.random.randint(pca_data.shape[0], size=n_samples) vec_data = pca_data[n_indices] labels = np.float32(d_mnist.targets) elif dataset_name == 'char_pc': digits = datasets.load_digits() n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) data = np.float32(data) targets = digits.target pca_data = PCA(n_components=input_dim).fit_transform(data) n_indices = np.random.randint(pca_data.shape[0], size=n_samples) vec_data = pca_data[n_indices] labels = targets else: d_mnist = MNIST(root="./datasets", download=True) data = d_mnist.data.numpy() vec_data = np.reshape(data, (data.shape[0], -1)) vec_data = np.float32(vec_data) labels = d_mnist.targets.numpy() if 0 < n_samples < vec_data.shape[0]: rand_indices = np.random.choice(vec_data.shape[0], size=(n_samples,), replace=False) return vec_data[rand_indices], labels[rand_indices] else: return vec_data, labels
def draw_accuracy_plot(train_accuracy, test_accuracy, epochs): plt.plot(epochs, train_accuracy, label="train") plt.plot(epochs, test_accuracy, label="test") plt.xlabel('epochs') plt.ylabel('accuracy') plt.title('training / test accuracy') plt.legend() plt.show() if args.dataset == 'MNIST': train_data = MNIST('../data/MNIST', train=True, download=True) test_data = MNIST('../data/MNIST', train=False, download=True) if args.dataset == 'EMNIST': train_data = EMNIST('../data/EMNIST', split='balanced', train=True, download=True) # 47 balanced classes test_data = EMNIST('../data/EMNIST', split='balanced', train=False, download=True) # split dataset into train and test according to hyperparam 'train_split' total_classes = list(dict.fromkeys(train_data.train_labels.numpy())) train_split = int(len(total_classes) * args.dataset_train_split) labels_to_combine_train_dataset = random.sample(total_classes, k=train_split) labels_to_combine_test_dataset = list( set(total_classes) - set(labels_to_combine_train_dataset)) random.seed(22) train_portion = int(len(total_classes) * (1 - args.test_data_portion))
filename = "./src/images/" + str(i) + "_" + str(j-examples +1 ) + ".npy" img = np.load(filename) image_i = 1 + i + columns * j ax = fig.add_subplot(rows + examples, columns, image_i) ax.axis("off") ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(img) plt.subplots_adjust(wspace=0, hspace=0.1) plt.show() if __name__ == '__main__': dataset = EMNIST('./data', train=True, download=True, split="byclass", transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) #viz = NLayerParameterVisualizator(dataset, list(range(2,10))) viz = NLayerParameterVisualizator(dataset, list(range(2, 8))) viz.sample_images() #viz.train() #viz.combine_images()
# 学習用モデルのインスタンスを生成します model = MLP() # ----------------------------------------------------------------------------- # 学習データの準備をします # print('---------- 学習のデータの準備 ----------') data_folder = '~/data' transform = transforms.Compose([ # データの型をTensorに変換する transforms.ToTensor() ]) # 学習データ train_data_with_labels = EMNIST( data_folder, train=True, download=True, transform=transform, split='mnist') train_data_loader = DataLoader( train_data_with_labels, batch_size=BATCH_SIZE, shuffle=True) # 検証データ test_data_with_labels = EMNIST( data_folder, train=False, download=True, transform=transform, split='mnist') test_data_loader = DataLoader( test_data_with_labels, batch_size=BATCH_SIZE, shuffle=True) # ----------------------------------------------------------------------------- # 学習の用意をします # 損失関数は交差エントロピー誤差関数を使います lossResult = nn.CrossEntropyLoss() # SGD
def download_emnist() -> None: """Download the EMNIST dataset via the PyTorch class.""" logger.info(f"Data directory is: {DATA_DIRNAME}") dataset = EMNIST(root=DATA_DIRNAME, split="byclass", download=True) save_emnist_essentials(dataset)
# Deep Dictionary Configurations input_dim = 784 # the input dimensions to be expected dd_layer_config = [784//2] # the layer configuration for the deep dictionary sparse_cff = 1e-1 # regularization to enusure sparseness in the dictionary representation epoch_per_level = 15 # the number of epochs to train for each layer of deep dictionary # MLP Configurations batch_size_train = 500 # the batch size of the MLP model (optimized via Adam) batch_size_valid = 500 epoch_mlp = 25 # the number of epochs to train the MLP for num_classes = 47 # the number of classes for classification (10 for MNIST) mlp_lr = 5e-3 # the learning rate for the Adam optimizer to optimize the MLP model # prepare data loaders mnist_train_data = EMNIST('./data/', split='balanced', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])) train_data, valid_data = torch.utils.data.random_split(mnist_train_data, [90240, 22560], generator=torch.Generator().manual_seed(0)) train_loader_dd = torch.utils.data.DataLoader(train_data, batch_size=len(train_data), shuffle=False, pin_memory=True) train_loader_mlp = torch.utils.data.DataLoader(train_data, batch_size=batch_size_train, shuffle=True, pin_memory=True) valid_loader_mlp = torch.utils.data.DataLoader(valid_data, batch_size=batch_size_valid, shuffle=True, pin_memory=True) test_data = EMNIST('./data/', split='balanced', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])) test_loader_mlp = torch.utils.data.DataLoader(test_data, batch_size=len(test_data), shuffle=True, pin_memory=True) # Function Class class Identity: @staticmethod def forward(x): return x
def load_emnist(split, root=None, transform=None, target_transform=None, download=True): root = root or Path("~/.learner/dataset").expanduser() train_ds = EMNIST(root=root, split=split, train=True, download=download, transform=transform, target_transform=target_transform) test_ds = EMNIST(root=root, split=split, train=False, download=download, transform=transform, target_transform=target_transform) data = Data(train_ds, test_ds=test_ds, auto_split=True) return data
from torchvision.datasets import EMNIST from torch.utils.data import TensorDataset from data.data_helpers import split_dataset, stratified_split_dataset import properties as prop import pwd, os from data.data_helpers import split_dataset, concat_datasets DATA_PATH = pwd.getpwuid(os.getuid()).pw_dir + '/time_series_data/eMNIST' def transform_data(data): data = data.unsqueeze(1).float().div(255) return data train_dataset = EMNIST(DATA_PATH, split='letters', train=True, download=True) # alternatives: letters, balanced trainX, trainy = transform_data(train_dataset.data), (train_dataset.targets-1) train_dataset = TensorDataset(trainX, trainy) ################ test dataset ################################ test_dataset = EMNIST(DATA_PATH, split='letters', train=False, download=True) # alternatives: letters, balanced testX, testy = transform_data(test_dataset.data), (test_dataset.targets-1) test_dataset = TensorDataset(testX, testy) full_dataset = concat_datasets(train_dataset, test_dataset) def get_data_splits():
def main(): ## reproducibility np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed_all(0) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False args = _parse_args() dist.init_process_group(backend=args.distributed_backend, init_method=args.distributed_init_method, world_size=args.distributed_world_size, rank=args.distributed_rank) ## data if args.data_name == "CIFAR10": transform = Compose([ ToTensor(), Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616)) ]) train_dataset = CIFAR10(args.data_root, transform=transform) test_dataset = CIFAR10(args.data_root, transform=transform, train=False) num_classes = 10 num_features = 32*32*3 elif args.data_name == "EMNIST": # transform = Compose([ToTensor(), Normalize([0.1732], [0.3317])]) transform = ToTensor() train_dataset = EMNIST(args.data_root, transform=transform, split="digits") test_dataset = EMNIST(args.data_root, transform=transform, split="digits", train=False) num_classes = 10 num_features = 28*28 else: transform = Compose([ToTensor(), Normalize([0.1732], [0.3317])]) train_dataset = MNIST(args.data_root, transform=transform) test_dataset = MNIST(args.data_root, transform=transform, train=False) num_classes = 10 num_features = 28*28 train_sampler = DistributedSampler(train_dataset) test_sampler = DistributedSampler(test_dataset, shuffle=False) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.dataloader_num_workers) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, sampler=test_sampler, num_workers=args.dataloader_num_workers) ## model device = torch.device("cuda" if args.use_cuda else "cpu") dtype = torch.float64 if args.use_double_precision else torch.float32 d = num_features if args.model_name == "NLLS" else num_features*num_classes weights = torch.zeros(d, device=device, dtype=dtype) ## run header = ["iter", "ccr", "loss", "grad", "test", "alpha"] print(("{:^16s}"*len(header)).format(*header)) iterations_list = [] communication_rounds_list = [] loss_list = [] grad_norm_list = [] test_val_list = [] step_size_list = [] communication_rounds = 0 iteration = 0 while communication_rounds < args.max_communication_rounds: iterations_list.append(iteration) communication_rounds_list.append(communication_rounds) loss, grad, _ = _obj_fun(args.model_name, train_loader, weights, device, dtype, comp_hess=False, use_regularization=args.use_regularization) loss_list.append(loss) grad_norm_list.append(grad.norm().item()) test_val = _get_test_val(args.model_name, test_loader, weights, device, dtype) test_val_list.append(test_val.item()) update_direction = _get_update_direction(args.model_name, train_loader, weights, device, dtype, grad, args.phi, args.theta, args.use_exact_solve, args.subproblem_tol, args.subproblem_max_iter, args.use_regularization) step_size = _get_step_size(args.model_name, train_loader, weights, device, dtype, loss, grad, update_direction, args.rho, args.use_regularization) step_size_list.append(step_size) weights.add_(update_direction, alpha=step_size) # code can be changed to have 5 or 6 communication rounds per iteration communication_rounds += (5 if iteration == 0 else 6) iteration += 1 print("{:^16g}{:^16g}{:^16.2e}{:^16.2e}{:^16.2e}{:^16.2e}".format( iterations_list[-1], communication_rounds_list[-1], loss_list[-1], grad_norm_list[-1], test_val_list[-1], step_size_list[-1])) if dist.get_rank() == 0: data = zip(iterations_list, communication_rounds_list, loss_list, grad_norm_list, test_val_list, step_size_list) np.savetxt('DINO.csv',list(data),delimiter=',',header=",".join(header))