def dataset_split(X, y, perc=VALIDATION_PERCENTAGE, random_state=RANDOM_SEED, return_data='samples'): """ Given two arrays of samples and label X and y, perform a random splitting in train and validation sets. :param X: numpy array of samples :param y: numpy array of labels :param perc: percentage of validation set :param random_state: random state of the splitter :param return_data: if True, return DataLoader objects instead of numpy arrays :return: (train_loader, val_loader) or (X_train, y_train), (X_val, y_val) or train_idx, val_idx """ assert 0 <= perc <= 1 sss = StratifiedShuffleSplit(n_splits=1, test_size=perc, random_state=random_state) train_idxs, valid_idxs = next(sss.split(X, y)) X_train, X_valid = X[train_idxs], X[valid_idxs] y_train, y_valid = y[train_idxs], y[valid_idxs] if return_data == 'data_loader': return get_data_loader(X_train, y_train), get_data_loader(X_valid, y_valid) elif return_data == 'samples': return (X_train, y_train), (X_valid, y_valid) elif return_data == 'indices': return train_idxs, valid_idxs
def load_dataloaders_from_dataset(dataset): (X_train, y_train), (X_test, y_test) = load_dataset(dataset) # Scale pixels values X_train, X_mean, X_std = image_preprocessing(X_train, scale_only=False) X_test, _, _ = image_preprocessing(X_test, seq_mean=X_mean, seq_std=X_std, scale_only=False) # Flatten for the dataloader y_train = y_train.flatten() y_test = y_test.flatten() # Stratified split of training and validation sss = StratifiedShuffleSplit(n_splits=1, test_size=VALIDATION_PERCENTAGE, random_state=RANDOM_SEED) train_idx, val_idx = next(sss.split(X_train, y_train)) (X_train, X_valid) = X_train[train_idx], X_train[val_idx] (y_train, y_valid) = y_train[train_idx], y_train[val_idx] # Generating data loaders train_dl = get_data_loader(X_train, y_train) val_dl = get_data_loader(X_valid, y_valid, shuffle=False) test_dl = get_data_loader(X_test, y_test, shuffle=False) return train_dl, val_dl, test_dl
def reparametrize_and_compute(dataset_name, break_after=-1): """ Starting from a randomly initialized MLP, compute 1) approx radius of flatness before & after reparam 2) local entropy before & after reparam """ model = MnistMLP().to(DEVICE) network_params = model.state_dict() train_loader = get_data_loader(dataset_name, "train", 100) rad0 = np.mean(_compute_c_epsilon_flatness(model, train_loader, network_params, break_after=break_after)) entr0 = _compute_local_entropy(model, train_loader, network_params, break_after=break_after) model = MnistMLP().to(DEVICE) network_params = model.state_dict() train_loader = get_data_loader(dataset_name, "train", 100) network_params["fc1.weight"] /= 5. network_params["fc2.weight"] *= 5. rad1 = np.mean(_compute_c_epsilon_flatness(model, train_loader, network_params, break_after=break_after)) entr1 = _compute_local_entropy(model, train_loader, network_params, break_after=break_after) print("Radius of flatness before reparam: {:.3f}, after reparam: {:.3f}".format(rad0, rad1)) print("Local entropy before reparam: {:.3f}, after reparam: {:.3f}".format(entr0, entr1))
def train(): mode = None if mode == "gan": D = get_sym_ful_conv_ae2((4,8,16,24,32,48,64),(4,)*7,None,(1,2,1,2,1,2,1),(32,1),enc_fn=nn.Sigmoid)[0] G = get_sym_ful_conv_ae2((4,8,16,24,32),(4,4,4,4,4),None,(1,2,1,2,1),(256,))[1] d,g = D(),G() train_gan(d,g,"CelebAGAN2",get_data_loader(CelebA,split="train"),get_data_loader(CelebA,split="test"),10,9,Optim5,0,1) elif mode == "ae": for tae in train_CelebA_aes[-1:]: tae.train() del tae._encoder, tae._decoder torch.cuda.empty_cache() elif mode == "vae": for E,D,name,epochs,data,Optim,loss_type in train_CelebA_vaes[-1:]: train_vae(E(),D(),get_data_loader(data), device, name, epochs, 9, Optim, loss_type,get_data_loader(data,split="test"))
def visualize_interpolated_trajectory(exp_names, model_name, dataset_name, break_after=-1): """ repeat for three runs """ for exp_name in exp_names: trajectory = load_history(exp_name)['trajectory'] model = get_model(model_name).to(DEVICE) train_loader = get_data_loader(dataset_name, "train", 100) res = [] epochs = [] for i in range(len(trajectory)-1): ps = [trajectory[i], trajectory[i+1]] for alph in [0., 0.2, 0.4, 0.6, 0.8]: weights = [alph, 1-alph] network_params = average_with_weights(ps, weights) model.load_params(network_params) loss = compute_approx_train_loss(model, train_loader, break_after) epochs.append(i+1+alph) res.append(loss) plt.plot(epochs, res) plt.xlabel("Interpolated epoch") plt.ylabel("Approx loss") plt.savefig("loss_interpolation_" + exp_name) plt.clf()
def main(**attrs): window_min = attrs["window_min"] window_size = int(window_min * 60 * 1000 / 200) step_min = attrs["step_min"] step_size = int(step_min * 60 * 1000 / 200) attrs["window_size"] = window_size attrs["step_size"] = step_size attrs["start"] = 0 data_paths = attrs["data"].split(',') data_paths = list(map(lambda x: os.path.join(data_dir, x), data_paths)) attrs["data_paths"] = data_paths attrs.pop("window_min", None) attrs.pop("step_min", None) attrs.pop("data", None) print(attrs) train_loader, val_loader, _ = get_data_loader(attrs) #return train_loader,val_loader,test_loader img_size = window_size train( attrs["cfg"], #opt.data_cfg, train_loader, val_loader, img_size=img_size, resume=attrs["resume"], epochs=attrs["epochs"], batch_size=attrs["batch_size"], accumulated_batches=attrs["accumulated_batches"], weights=attrs["weights"], #multi_scale=opt.multi_scale, #freeze_backbone=opt.freeze, var=attrs["var"], )
def reparam_and_local_entropy(exp_name, model_name, dataset_name, gamma=100, n_trials=10, break_after=-1): model = get_model(model_name).to(DEVICE) network_params = model.state_dict() network_params["features.0.weight"] /= 5. network_params["features.3.weight"] *= 5. train_loader = get_data_loader(dataset_name, "train", 100) entr = _compute_local_entropy(model, train_loader, network_params, break_after=break_after) return entr
def reparam_and_c_eps_flat(exp_name, model_name, dataset_name, gamma=100, n_trials=10, break_after=-1): model = get_model(model_name).to(DEVICE) network_params = model.state_dict() network_params["features.0.weight"] /= 5. network_params["features.3.weight"] *= 5. train_loader = get_data_loader(dataset_name, "train", 100) rad = np.mean(_compute_c_epsilon_flatness(model, train_loader, network_params, break_after=break_after)) return rad
def tests(): a = (MNISTEncoder4, MNISTDecoder4, "AE4") b = (MNISTEncoder4, MNISTDecoder4, "FashionAE4") dl_a = get_data_loader("MNIST") dl_b = get_data_loader("FashionMNIST") encoder_a, decoder_a = load_model(*a) encoder_b, decoder_b = load_model(*b) mean_a, cov_a = latent_space_pca(encoder_a, dl_a) mean_b, cov_b = latent_space_pca(encoder_b, dl_a) ls = encoder_a.latent_size plot_images2(decoder_a(sample_in_pc(9, mean_a, cov_a)).detach()) plot_images2( decoder_a(normal_to_pc(get_sample_k_of_d(9, 4, ls) * 3, mean_a, cov_a)).detach()) plot_images2( decoder_a(normal_to_pc(torch.eye(ls) * 3, mean_a, cov_a)).detach()) batch_a = next(iter(dl_a))[0] batch_b = next(iter(dl_b))[0] plot_images2(decoder_a(encoder_a(batch_a)).detach()) plot_images2(decoder_a(encoder_a(batch_b)).detach()) plot_images2(decoder_a(encoder_b(batch_a)).detach()) plot_images2(decoder_a(encoder_b(batch_b)).detach()) plot_images2(decoder_b(encoder_a(batch_a)).detach()) plot_images2(decoder_b(encoder_a(batch_b)).detach()) plot_images2(decoder_b(encoder_b(batch_a)).detach()) plot_images2(decoder_b(encoder_b(batch_b)).detach()) x_a = labeled_latent_space_pca(encoder_a, dl_a) plot_images2(decoder_a(sample_in_pc(9, *x_a[7])).detach()) plot_images2( decoder_a( torch.stack([mean for label, (mean, cov) in x_a.items() ]).view(-1, ls)).detach()) x_b = labeled_latent_space_pca(encoder_b, dl_b) plot_images2(decoder_b(sample_in_pc(9, *x_b[9])).detach()) plot_images2( decoder_b( torch.stack([mean for label, (mean, cov) in x_b.items() ]).view(-1, ls)).detach())
def train_model(model_name, dataset, batch_size, lr, n_epochs, check_name, model_dir, **kwargs): if check_name == "default": check_name = f"{model_name}_{dataset}" model = get_model(model_name) train_dataloader = get_data_loader(dataset, True, batch_size=batch_size) val_dataloader = get_data_loader(dataset, False, batch_size=batch_size) # stuff that could be adjusted opt = Adam(model.parameters(), lr=lr) scheduler = ReduceLROnPlateau(opt, patience=3, threshold=0.1, min_lr=1e-5) criterion = nn.CrossEntropyLoss() for epoch in range(n_epochs): loss = train_epoch(model, train_dataloader, criterion, opt, scheduler) print("Finished epoch {}, avg loss {:.3f}".format(epoch+1, loss)) val_loss, acc = validate(model, val_dataloader, criterion, scheduler) print("Validation loss: {}, accuracy: {}".format(val_loss, acc)) model.save(check_name, model_dir) print("Model {} has been saved to {}".format(model_name, model_dir))
def evaluate_sparsifier(model_name, dataset, check_name, model_dir, sparse, method, variance_based=False, **kwargs): """ Run a single sparsification eval and return the result. TODO: add a custom sparsification caller. """ if check_name == "default": check_name = f"{model_name}_{dataset}" model = get_model(model_name) model.load(check_name, model_dir) train_data = get_dataset(dataset, is_train=True) val_data = get_dataset(dataset, is_train=False) val_loader = get_data_loader(dataset, is_train=False) if method == "corenet": # two cases to account for different nnz parameters computation sparse_model = sparsify_corenet(model, train_data, s_sparse=sparse) pre_nnz = model.count_nnz() post_nnz = sparse_model.count_nnz() elif method == "svd": #print(variance_based) sparse_model = sparsify_svd(model, sparse, variance_based) pre_nnz = compute_nnz_svd(model) post_nnz = compute_nnz_svd(sparse_model) else: raise ValueError(f"Method {method} not available") max_dev = evaluate_coverage(model, sparse_model, val_data, 0.5) pre_acc = evaluate_val_acc(model, val_loader) post_acc = evaluate_val_acc(sparse_model, val_loader) res = { 'sparsification': { 'pre_nnz': pre_nnz, 'post_nnz': post_nnz }, 'accuracy': { 'pre_acc': pre_acc, 'post_acc': post_acc }, 'coverage': max_dev } return res
def run(beta=10, seed=1234): save_dir = os.path.join(SAVE_DIR, DATASET_NAME) if os.path.exists(save_dir): shutil.rmtree(save_dir) os.makedirs(save_dir) torch.manual_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_loader, ds = get_data_loader(DATASET_NAME, BATCH_SIZE, N_STEPS) model = BetaTCVAE(beta, IMG_CHANNELS, N_LATENTS, ds.size).to(device) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, betas=(ADAM_BETA1, ADAM_BETA2)) ckpt_paths = train(train_loader, model, optimizer, device, save_dir) visual_dir = os.path.join(OUTPUT_DIR, DATASET_NAME, 'visual') if os.path.exists(visual_dir): shutil.rmtree(visual_dir) os.makedirs(visual_dir) for path in ckpt_paths: eval_loader, _ = get_data_loader(DATASET_NAME, 1, 100) eval_visual(eval_loader, model, device, path, visual_dir)
def compute_spectral_sharpness(exp_name, model_name, dataset_name): """ Compute average top eigenvalue over a few batches """ model = get_model(model_name).to(DEVICE) network_params = load_history(exp_name)['trajectory'][-1] model.load_params(network_params) train_loader = get_data_loader(dataset_name, "train", 100) first = True for data in train_loader: if first: inputs, targets = data first = False # criterion = compute_loss(model, inputs, targets) eigenvalue, eigenvector = get_eigen(model, inputs, targets, nn.CrossEntropyLoss(), maxIter=10, tol=1e-2) return eigenvalue
def main(args): device = args.device if torch.cuda.is_available() else 'cpu' checkpoint = os.path.join(args.root, 'checkpoint.pt') with open(os.path.join(args.root, 'config.json')) as inp: config = json.load(inp) args.root = os.path.join(args.root, 'det_checkpoints') os.makedirs(args.root, exist_ok=True) model = get_model_from_config(config) model.load_state_dict(torch.load(checkpoint, map_location=device)) model.to(device) det_config = config.copy() det_config['model_name'] = f"Det{model.__class__.__name__[3:]}" det_model = get_model_from_config(det_config) test_loader = get_data_loader(config['dataset'], args.batch_size, test_only=True) ece = ECELoss(args.ece_bins) results = [] predictions = [] for index in ['ones', 'mean'] + list(range(config['n_components'])): det_model = StoLayer.convert_deterministic(model, index, det_model) torch.save(det_model.state_dict(), os.path.join(args.root, f'checkpoint_{index}.pt')) y_prob, y_true, acc, tnll, nll_miss = test_model_deterministic( det_model, test_loader, device) pred_entropy = entropy(y_prob, axis=1) ece_val = ece(torch.from_numpy(y_prob), torch.from_numpy(y_true)).item() predictions.append(y_prob) result = { 'checkpoint': index, 'nll': float(tnll), 'nll_miss': float(nll_miss), 'ece': float(ece_val), 'predictive_entropy': { 'mean': float(pred_entropy.mean()), 'std': float(pred_entropy.std()) }, **{f"top-{k}": float(a) for k, a in enumerate(acc, 1)} } results.append(result) results = pd.DataFrame(results) results.to_csv(os.path.join(args.root, 'results.csv'), index=False) np.save(os.path.join(args.root, 'preds.npy'), np.array(predictions))
def compute_c_epsilon_flatness(exp_name, model_name, dataset_name, eps=0.05, n_trials=100, break_after=-1): """ Input: experiment name and parameter epsilon Returns: float flatness """ model = get_model(model_name).to(DEVICE) # bs here is chosen by processing speed: train_loader = get_data_loader(dataset_name, "train", 100) network_params = load_history(exp_name)['trajectory'][-1] # call helper steps_to_border = _compute_c_epsilon_flatness(model, train_loader, network_params, eps, n_trials, break_after) # Display results: print("Approximate radius of flatness: {:.3f} +/- {:.3f}".format(np.mean(steps_to_border), np.std(steps_to_border))) return np.mean(steps_to_border)
def get_dataloader(batch_size, validation, validation_fraction, seed, dataset): return get_data_loader(dataset, batch_size, validation, validation_fraction, seed)
return_data='samples') # Image pre-processing: scale pixel values X_train_noisy_sc, X_mean, X_std = image_preprocessing(X_train_noisy, scale_only=False) X_valid_noisy_sc, _, _ = image_preprocessing(X_valid_noisy, seq_mean=X_mean, seq_std=X_std, scale_only=False) X_test_noisy_sc, _, _ = image_preprocessing(X_test_noisy, seq_mean=X_mean, seq_std=X_std, scale_only=False) # Dataloaders train_noisy_dl = get_data_loader(X_train_noisy_sc, y_train, shuffle=True) valid_noisy_dl = get_data_loader(X_valid_noisy_sc, y_valid, shuffle=True) test_noisy_dl = get_data_loader(X_test_noisy_sc, y_test, shuffle=True) # Writer writer = SummaryWriter('runs/' + '{}_{}_fine_tuning'.format(baseline, dataset)) # Fine-tuning print('Fine-tuning...') model_finetune = copy.deepcopy(model_clean) n_classes = len(np.unique(y_train)) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model_finetune.parameters()) train_losses, train_accuracies, val_accuracies, val_losses, _, _ = train( model_finetune,
def compute_local_entropy(exp_name, model_name, dataset_name, gamma=100., n_trials=100, break_after=-1): model = get_model(model_name).to(DEVICE) network_params = load_history(exp_name)['trajectory'][-1] train_loader = get_data_loader(dataset_name, "train", 100) return _compute_local_entropy(model, train_loader, network_params, gamma, n_trials)
def main(args): # setting device if args.cuda and torch.cuda.is_available(): """ if argument is given and cuda is available """ device = torch.device('cuda') else: device = torch.device('cpu') dataset_name = args.dataset.lower() if 'mnist' not in dataset_name and 'cifar' not in dataset_name: raise Exception('{} dataset not yet supported'.format(dataset_name)) else: dset = get_cifar10_dataset if 'cifar' in dataset_name else get_mnist_dataset # getting the right dataset train, test = dset() dataloader = get_data_loader(train, test, batch_size=args.batch_size) trainloader, testloader = dataloader['train'], dataloader['test'] batch, labels = next(iter(trainloader)) # converting data to tensors batch_var = Variable(batch).to(device) labels_var = Variable(one_hotify(labels).to(device)) # getting the right model model_name = args.model.lower() model_dict = { 'resnet18': resnet.resnet18(num_classes=10, version=2), 'resnet34': resnet.resnet34(num_classes=10, version=2), 'resnet50': resnet.resnet50(num_classes=10, version=2), 'resnet101': resnet.resnet101(num_classes=10, version=2), 'resenet152': resnet.resnet152(num_classes=10, version=2), 'capsnet': None } if model_name not in model_dict: raise Exception( '{} not implemented, try main.py --help for additional information' .foramt(model_name)) else: model = model_dict[model_name] # temporary if not model: raise Exception('CapsNet in progress') ckpt_name = os.path.join( args.ckpt_dir, '{}_{}'.format(model_name, dataset_name) + '.pth.tar') if os.path.isfile(ckpt_name): base_trainer.load(filename=ckpt_name) model.to(device) #model graph is placed fname = os.path.join('checkpoints', '{}_{}'.format('resnet18_v2', 'cifar10') + '.pth.tar') base_loss = nn.CrossEntropyLoss() base_optimizer = optim.SGD(model.parameters(), lr=args.lr) base_trainer = Trainer(model, base_optimizer, base_loss, trainloader, testloader, use_cuda=args.cuda) base_trainer.load_checkpoint(fname) # base_trainer.run(epochs=1) #base_trainer.save_checkpoint(ckpt_name) net = Solver(args, model, dataloader) net.generate(num_sample=args.batch_size, target=args.target, epsilon=args.epsilon, alpha=args.alpha, iteration=args.iteration)
def visualize_checkpoint_simplex(exp_names, model_name, dataset_name, cutoff=3.5, mode="grid", break_after=-1): """ Given three points, plot surface over their convex combinations. """ ps = [] for exp_name in exp_names: last_trajectory_point = load_history(exp_name)['trajectory'][-1] ps.append(last_trajectory_point) model = get_model(model_name).to(DEVICE) train_loader = get_data_loader(dataset_name, "train", 100) # TODO: use meshgrid instead if mode == "triangle": x_simplex, y_simplex, losses = [], [], [] for simplex_sample in generate_simplex_combs(ps, 3): network_params, simplex_point = simplex_sample model.load_params(network_params) loss = compute_approx_train_loss(model, train_loader) x_simplex.append(simplex_point[0]) y_simplex.append(simplex_point[1]) losses.append(loss) tri = mtri.Triangulation(x_simplex, y_simplex) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.plot_trisurf(x_simplex, y_simplex, losses, triangles=tri.triangles, cmap=plt.cm.Spectral, label="loss surface interpolation") plt.savefig("surf_" + "_".join(s for s in exp_names)) else: x = np.linspace(-0.4, 1.3, 50) y = np.linspace(-0.4, 1.3, 50) X, Y = np.meshgrid(x, y) Z = 1 - X - Y grid = simplex_grid(3, 25) / 25 grid_val = [] Z_ = [] for i in tqdm(range(X.shape[0])): Z_ += [[]] for j in range(Y.shape[0]): weights = [X[i, j], Y[i, j], Z[i, j]] network_params = average_with_weights(ps, weights) model.load_params(network_params) loss = compute_approx_train_loss(model, train_loader, break_after) Z_[i].append(loss) losses = np.array(Z_) # backup everything everything np.save("./data/X_" + "_".join(s for s in exp_names), X) np.save("./data/Y_" + "_".join(s for s in exp_names), Y) np.save("./data/Z_" + "_".join(s for s in exp_names), Z_) losses[losses > cutoff] = cutoff fig = plt.figure() cmap = matplotlib.cm.coolwarm cmap.set_bad('white', 1.) cmap.set_over('white', alpha=.1) ax = fig.add_subplot(111, projection='3d') ax.plot_surface(X, Y, losses, vmax=cutoff, rstride=1, cstride=1, cmap=cmap, edgecolor='none', antialiased=True) ax.view_init(50, 200) #225 plt.savefig("surf_" + "_".join(s for s in exp_names)) plt.clf()
parser.add_argument('--in_channels', '-i', type=int, default=3) parser.add_argument('--classes', '-c', type=int, default=10) parser.add_argument('--batch_size', '-b', type=int, default=64) parser.add_argument('--ece_bins', type=int, default=15) parser.add_argument('--dropout', action='store_true') args = parser.parse_args() device = args.device if torch.cuda.is_available() else 'cpu' num_sample = args.num_samples checkpoint = os.path.join(args.root, 'checkpoint.pt') with open(os.path.join(args.root, 'config.json')) as inp: config = json.load(inp) args.root = os.path.join(args.root, config['dataset']) os.makedirs(args.root, exist_ok=True) text_path = os.path.join(args.root, f'{"dropout_" if args.dropout else ""}result.json') test_loader = get_data_loader(config['dataset'], args.batch_size, test_only=True) model = get_model_from_config(config) model.load_state_dict(torch.load(checkpoint, map_location=device)) model.to(device) if model.__class__.__name__.startswith('Det'): if args.dropout: y_prob_all, y_prob, y_true, acc, tnll, nll_miss = test_dropout(model, test_loader, device, args.num_samples) else: y_prob, y_true, acc, tnll, nll_miss = test_model_deterministic(model, test_loader, device) elif model.__class__.__name__.startswith('Sto'): y_prob_all, y_prob, y_true, acc, tnll, nll_miss = test_stochastic(model, test_loader, device, args.num_samples) elif model.__class__.__name__.startswith('Bayesian'): y_prob_all, y_prob, y_true, acc, tnll, nll_miss = test_bayesian(model, test_loader, device, args.num_samples) pred_entropy = entropy(y_prob, axis=1) np.save(os.path.join(args.root, f'{"dropout_" if args.dropout else ""}predictions.npy'), y_prob) ece = ECELoss(args.ece_bins)
def train_loader(self): if self._train_loader is None: self._train_loader = get_data_loader(self._dataset_name, self.label, split="train") return self._train_loader
def val_loader(self): if self._val_loader is None: self._val_loader = get_data_loader(self._dataset_name, self.label, split="valid") return self._val_loader
def run_training(model_name="vgg16", dataset_name="cifar10", batch_size=32, lr=1e-3, n_epochs=10, save_hist_period=1, verbose=False): """ For now only one model (vgg-16). Params: :model_name: "vgg{11,13,16,19}" or "lenet" (or "[...]_random") :dataset_name: "cifar10" or "mnist" :batch_size: int :lr: float :n_epochs: number of training epochs :save_hist_period: frequency with which points are saved """ # name of current checkpoint/run check_name = record_experiment(model_name, dataset_name, batch_size, lr) # setup model, optimizer and logging model = get_model(model_name).to(DEVICE) optimizer = SGD(params=model.parameters(), lr=lr) # scheduler = ReduceLROnPlateau(optimizer, patience=3, # threshold=0.1, min_lr=1e-5) scheduler = StepLR(optimizer, step_size=10, gamma=0.1) cross_ent = nn.CrossEntropyLoss() # load data train_loader = get_data_loader(dataset_name, "train", batch_size) val_loader = get_data_loader(dataset_name, "val", batch_size) history = init_history() update_history({"train_loss": float("inf"), "val_acc": 0., "weights": deepcopy(model.state_dict())}, history, check_name) for epoch in range(n_epochs): model.train() if verbose: print("Starting training epoch {}".format(epoch+1)) running_loss = 0. num_batches = len(train_loader) for (xs, ys) in train_loader: xs, ys = xs.to(DEVICE), ys.to(DEVICE) optimizer.zero_grad() logits = model(xs) loss = cross_ent(logits, ys) loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 5.) optimizer.step() running_loss += loss.item() if np.isnan(running_loss): print("Loss is nan") exit(0) avg_loss = running_loss / num_batches scheduler.step(avg_loss) model.save(check_name) if verbose: print("Epoch {} loss: {:.3f}".format(epoch+1, avg_loss)) if epoch % save_hist_period == 0: model.eval() accs = [] for (xs, ys) in val_loader: xs, ys = xs.to(DEVICE), ys.to(DEVICE) logits = model(xs) y_pred = logits.argmax(dim=1) batch_acc = (y_pred == ys).float().mean().item() accs.append(batch_acc) if verbose: print("Validation accuracy: {:.3f}".format(np.mean(accs))) update_history({"train_loss": avg_loss, "val_acc": np.mean(accs), "weights": deepcopy(model.state_dict())}, history, check_name) print("Last avg loss {}, eval acc {}".format(avg_loss, np.mean(accs)))
def train_baseline(dataset, model_name, noisy=False, distortion_type='AWGN', distortion_amount=25, flatten=False, verbose=True, classes=None): """ Train a baseline model using a specific dataset. :param dataset: dataset to train the model on :param model_name: name of the neural network model :param noisy: if True, train the model on :param distortion_type: either 'blur' or 'AWGN' :param distortion_amount: severity of the distortion :param flatten: flatten the input image to use it in a FF network model :param verbose: add verbosity :return: """ assert dataset in DSETS # Set seeds torch.manual_seed(RANDOM_SEED) torch.cuda.manual_seed_all(RANDOM_SEED) # Set model path model_path = os.path.join('baselines', model_name + '.pt') # Train baseline on noisy data if noisy: (X_train, y_train), (X_test, y_test) = (np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset, distortion_type + '-' + str(distortion_amount), 'X_train_noisy.npy')), np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset, distortion_type + '-' + str(distortion_amount), 'y_train.npy'))), \ (np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset, distortion_type + '-' + str(distortion_amount), 'X_test_noisy.npy')), np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset, distortion_type + '-' + str(distortion_amount), 'y_test.npy'))) # Train baseline on clean data else: if dataset == 'CIFAR_10': (X_train, y_train), (X_test, y_test) = load_CIFAR10() if classes is not None: X_train, y_train = select_classes(X_train, y_train, classes, convert_labels=True) X_test, y_test = select_classes(X_test, y_test, classes, convert_labels=True) if len(np.unique(y_train)) > 2: baseline_net = SimpleBaselineNet(output_dim=len(classes)) else: if model_name == 'SimpleBaselineBinaryNetTanh': y_train = convert_labels(y_train, [0, 1], [-1, 1]) y_test = convert_labels(y_test, [0, 1], [-1, 1]) baseline_net = SimpleBaselineBinaryNet(activation='tanh') elif model_name == 'SimpleBaselineBinaryNet': baseline_net = SimpleBaselineBinaryNet( activation='sigmoid') elif model_name == 'SimplerBaselineBinaryNetTanh': y_train = convert_labels(y_train, [0, 1], [-1, 1]) y_test = convert_labels(y_test, [0, 1], [-1, 1]) baseline_net = SimpleBaselineBinaryNet(activation='tanh', num_conv=32, num_ff=32) elif dataset == 'CIFAR_100': (X_train, y_train), (X_test, y_test) = load_CIFAR100() if model_name == 'SqueezeNetBaseline': baseline_net = squeezenet() else: baseline_net = ACNBaselineNet() elif dataset == 'MNIST': (X_train, y_train), (X_test, y_test) = load_MNIST() if classes is not None: X_train, y_train = select_classes(X_train, y_train, classes, convert_labels=True) X_test, y_test = select_classes(X_test, y_test, classes, convert_labels=True) if len(np.unique(y_train)) > 2: baseline_net = FFSimpleNet() else: baseline_net = FFBinaryNet() flatten = True elif dataset == 'USPS': (X_train, y_train), (X_test, y_test) = load_USPS(resize=(28, 28)) baseline_net = FFSimpleNet() flatten = True else: raise RuntimeError( "Dataset not in the predefined list: {}".format(DSETS)) # Scale pixels values X_train, X_mean, X_std = image_preprocessing(X_train, scale_only=False) X_test, _, _ = image_preprocessing(X_test, seq_mean=X_mean, seq_std=X_std, scale_only=False) # Flatten for the dataloader y_train = y_train.flatten() y_test = y_test.flatten() # Stratified split of training and validation sss = StratifiedShuffleSplit(n_splits=1, test_size=VALIDATION_PERCENTAGE, random_state=RANDOM_SEED) train_idx, val_idx = next(sss.split(X_train, y_train)) (X_train, X_valid) = X_train[train_idx], X_train[val_idx] (y_train, y_valid) = y_train[train_idx], y_train[val_idx] # Generating data loaders train_loader_clean = get_data_loader(X_train, y_train) val_loader_clean = get_data_loader(X_valid, y_valid, shuffle=False) test_loader_clean = get_data_loader(X_test, y_test, shuffle=False) # Logger if noisy: writer = SummaryWriter('runs/' + dataset + '_baseline_noisy') else: writer = SummaryWriter('runs/' + dataset + '_baseline_clean') # Optimizer and criterion optimizer = torch.optim.Adam(baseline_net.parameters()) # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 60, gamma=0.02, last_epoch=-1) if len(np.unique(y_train)) > 2: criterion = nn.CrossEntropyLoss() elif len(np.unique(y_train)) == 2 and -1 in np.unique(y_train): criterion, _ = init_loss(X_train, loss='exp') else: criterion = nn.BCELoss() baseline_net.to(device) # Training and evaluation if verbose: print('Starting {} baseline training on {}'.format( baseline_net.__class__.__name__, dataset)) train(model=baseline_net, train_loader=train_loader_clean, val_loader=val_loader_clean, test_loader=test_loader_clean, optimizer=optimizer, criterion=criterion, device=device, model_path=model_path, writer=writer, save_model=True, scheduler=None, flatten=flatten, early_stopping=True) acc = evaluate(baseline_net, test_loader_clean, device, flatten) if verbose: print('Your baseline accuracy on ' + dataset + ' (x_test_clean) = %.3f' % acc)
import torch from torch import optim from datasets import get_mnist_dataset, get_cifar10_dataset, get_data_loader from utils import * from models import * trainset, testset = get_mnist_dataset() trainloader, testloader = get_data_loader(trainset, testset) batch, labels = next(iter(trainloader)) plot_batch(batch) batch_var = Variable(batch.cuda()) labels_var = Variable(one_hotify(labels).cuda()) base_model = BaselineCNN().cuda() print(count_params(base_model)) base_loss = nn.CrossEntropyLoss() base_optimizer = optim.Adam(base_model.parameters()) base_trainer = Trainer(base_model, base_optimizer, base_loss, trainloader, testloader, use_cuda=True) base_trainer.run(epochs=10) base_trainer.save_checkpoint('weights/baseline_mnist.pth.tar')