def bad_boy_vibes(tau=0, activation='tanh', cuda=False, num_epochs=50): """ Runs a single layer MLP with 2,000 hidden units on MNIST. The user can specify at what point in the eigenvalue spectrum should the regularizer start aka tau. The other parameters of the experiment are fixed: batch_size=2500, lr=0.001, number of realizations=3, regularization strengths=[0.1, 1, 5], slope values=[1.06, 1.04, 1.02, 1.00]. :param tau: integer, at what point in the eigenvalue spectrum should we regularize """ realizations = 3 batch_size = 2500 # 1.25 times the widest layer in the network lr = 1e-3 train_loader, _, full_loader = get_data(dataset='MNIST', batch_size=batch_size, _seed=0, validate=False, data_dir='') weights = [0, 1e-4, 1e-3, 1e-2, 1e-1] for w in weights: kwargs = {"dims": [(28 * 28, 2000), (2000, 10)], "activation": activation, "architecture": "mlp", "trainer": "vanilla", "regularizer": "no", 'alpha_jacob': 1e-4, 'bn': True, 'alpha_spectra': 1, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': [1.00], 'eig_start': tau, 'weight_decay': w} models = [ModelFactory(**kwargs) for j in range(realizations)] for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) if w == 0: torch.save(model_params, 'experiment_1/vanilla' + '_activation=' + activation + '_epochs=' + str(num_epochs)) else: torch.save(model_params, 'experiment_1/vanilla' + '_activation=' + activation + '_epochs=' + str(num_epochs) + '_w=' + str(w))
def train_network(tau=10, activation='tanh', cuda=False, num_epochs=100, vanilla=False, dataset='MNIST', arch='mlp', realizations=3, flat=False, save_dir='experiment_2/'): """ Training script for running experiments for section 4.2 in paper. :param tau: integer, at what point in the eigenvalue spectrum should we regularize :param activation: string, what activation function to use (only choices are tanh and relu) :param cuda: boolean, flag indicating whether to use GPU or not :param num_epochs: integer, number of epochs to train model for :param vanilla: boolean, flag indicating whether to use spectral regularizer or not :param dataset: string, which dataset to use (choices are MNIST or CIFAR10) :param arch: string, whether to use an MLP ('mlp') or a CNN ('cnn') :param realizations: integer, number of random seeds to use :param save_dir: string, location where to save models """ lr = 1e-4 if arch == 'mlp': if flat: arch = 'mlp_flat' dims = [(28 * 28, 1_000), (1_000, 1_000), (1_000, 1_000), (1_000, 10)] batch_size = 1500 dataset = 'MNIST' elif arch == 'cnn': if flat: arch = 'cnn_flat' lr = 1e-4 if dataset == 'MNIST': dims = [2, (1, 16), (16, 32), (800, 1000), (1000, 10)] batch_size = 6000 else: dims = [2, (3, 16), (16, 32), (1152, 1000), (1000, 10)] batch_size = 6000 else: print("Doesnt exist!!") train_loader, _, full_loader = get_data(dataset=dataset, batch_size=batch_size, _seed=0, validate=False, data_dir='data/') if vanilla: train_loader, _, full_loader = get_data(dataset=dataset, batch_size=batch_size, _seed=0, validate=False, data_dir='data/') kwargs = { "dims": dims, "activation": activation, "architecture": arch, "trainer": "vanilla", "regularizer": "no", 'alpha_jacob': 1e-4, 'bn': False, 'alpha_spectra': 1, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': [1.00], 'eig_start': tau } models = [ModelFactory(**kwargs) for j in range(realizations)] for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save( model_params, save_dir + dataset + '/vanilla_arch=' + arch + '_activation=' + activation + '_epochs=' + str(num_epochs)) else: regularizers_strengths = [5., 2., 1.] # In[] "Load in data loader" X_full, _ = next( iter(full_loader)) # load in full training set for eigenvectors # In[] kwargs = { "dims": dims, "activation": activation, "architecture": arch, "trainer": "vanilla", "regularizer": "eig", 'alpha_jacob': 1e-4, 'bn': False, 'alpha_spectra': 1.0, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': 1.00, 'eig_start': tau } counter = 0 for reg_strength in regularizers_strengths: kwargs['alpha_spectra'] = reg_strength models = [ModelFactory(**kwargs) for j in range(realizations)] print('no vibes') for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader, X_full) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save( model_params, save_dir + dataset + '/tau=' + str(tau) + '_arch=' + arch + '_activation=' + activation + '_epochs=' + str(num_epochs) + '_alpha=' + str(1) + '_beta=' + str(reg_strength)) counter += 1 print(str(len(regularizers_strengths) - counter) + " combos left")
def bad_boy(tau=10, activation='tanh', cuda=False, num_epochs=100, vanilla=False, dataset='CIFAR10', arch='cnn_flat', realizations=3): lr = 1e-4 if arch == 'flat': dims = [(28 * 28, 1_000), (1_000, 1_000), (1_000, 1_000), (1_000, 10)] batch_size = 1500 dataset = 'MNIST' elif arch == 'cnn_flat': lr = 1e-4 if dataset == 'MNIST': dims = [2, (1, 16), (16, 32), (800, 1000), (1000, 10)] batch_size = 6000 else: dims = [2, (3, 16), (16, 32), (1152, 1000), (1000, 10)] batch_size = 6000 else: print("Doesnt exist!!") train_loader, _, full_loader = get_data(dataset=dataset, batch_size=batch_size, _seed=0, validate=False, data_dir='data/') if vanilla: train_loader, _, full_loader = get_data(dataset=dataset, batch_size=batch_size, _seed=0, validate=False, data_dir='data/') kwargs = { "dims": dims, "activation": activation, "architecture": arch, "trainer": "vanilla", "regularizer": "no", 'alpha_jacob': 1e-4, 'bn': False, 'alpha_spectra': 1, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': [1.00], 'eig_start': tau, 'demean': True } models = [ModelFactory(**kwargs) for j in range(realizations)] for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save( model_params, 'experiment_2/' + dataset + '/flat_vanilla_arch=' + arch + '_activation=' + activation + '_epochs=' + str(num_epochs)) else: regularizers_strengths = [5., 2., 1.] # In[] "Load in data loader" X_full, _ = next( iter(full_loader)) # load in full training set for eigenvectors # In[] kwargs = { "dims": dims, "activation": activation, "architecture": arch, "trainer": "vanilla", "regularizer": "eig", 'alpha_jacob': 1e-4, 'bn': False, 'alpha_spectra': 1.0, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': 1.00, 'eig_start': tau, 'demean': True } counter = 0 for reg_strength in regularizers_strengths: kwargs['alpha_spectra'] = reg_strength models = [ModelFactory(**kwargs) for j in range(realizations)] print('no vibes') for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader, X_full) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save( model_params, 'experiment_2/' + dataset + '/flat_tau=' + str(tau) + '_arch=' + arch + '_activation=' + activation + '_epochs=' + str(num_epochs) + '_alpha=' + str(1) + '_beta=' + str(reg_strength)) counter += 1 print(str(len(regularizers_strengths) - counter) + " combos left")
def bad_boy_vibes(tau=0, activation='tanh', cuda=False, num_epochs=50, parallel=False, vanilla=False, lr=1e-3): """ Runs a single layer MLP with 2,000 hidden units on MNIST. The user can specify at what point in the eigenvalue spectrum should the regularizer start aka tau. The other parameters of the experiment are fixed: batch_size=2500, lr=0.001, number of realizations=3, regularization strengths=[0.1, 1, 5], slope values=[1.06, 1.04, 1.02, 1.00]. :param tau: integer, at what point in the eigenvalue spectrum should we regularize """ realizations = 3 batch_size = 3500 # 1.25 times the widest layer in the network train_loader, _, full_loader = get_data(dataset='MNIST', batch_size=batch_size, _seed=0, validate=False, data_dir='') if vanilla: kwargs = {"dims": [(28 * 28, 2000), (2000, 10)], "activation": activation, "architecture": "mlp", "trainer": "vanilla", "regularizer": "no", 'alpha_jacob': 1e-4, 'bn': False, 'alpha_spectra': 1, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': [1.00], 'eig_start': tau} models = [ModelFactory(**kwargs) for j in range(realizations)] for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save(model_params, 'experiment_1/vanilla' + '_activation=' + activation + '_epochs=' + str(num_epochs)) else: slopes = [1.00] regularizers_strengths = [1., 2., 5.] stuff_to_loop_over = product(slopes, regularizers_strengths) # In[] "Load in data loader" X_full, _ = next(iter(full_loader)) # load in full training set for eigenvectors # In[] kwargs = {"dims": [(28 * 28, 2000), (2000, 10)], "activation": activation, "architecture": "mlp", "trainer": "vanilla", "regularizer": "eig", 'alpha_jacob': 1e-4, 'bn': False, 'alpha_spectra': 1, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': [1.00], 'eig_start': tau} counter = 0 for (slope, reg_strength) in stuff_to_loop_over: kwargs['slope'] = slope kwargs['alpha_spectra'] = reg_strength models = [ModelFactory(**kwargs) for j in range(realizations)] if not parallel: print('no vibes') for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader, X_full) else: print('vibes') del full_loader models = Parallel(n_jobs=realizations)(delayed(train)(models[j], batch_size, num_epochs, X_full) for j in range(realizations)) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save(model_params, 'experiment_1/tau=' + str(tau) + '_activation=' + activation + '_epochs=' + str(num_epochs) + '_alpha=' + str(slope) + '_beta=' + str(reg_strength)) counter += 1 print(str(len(slopes) * len(regularizers_strengths) - counter) + " combos left")
def train(model, batch_size, num_epochs, X_full): train_loader, _, _ = get_data(dataset='MNIST', batch_size=batch_size, _seed=np.random.randint(100), validate=False, data_dir='') for epoch in tqdm(range(num_epochs)): model.train_epoch(train_loader, X_full) return model
def train_network(tau=10, activation='tanh', cuda=False, num_epochs=50, vanilla=False, lr=1e-3, save_dir='experiment_1/'): """ Runs a single layer MLP with 2,000 hidden units on MNIST. The user can specify at what point in the eigenvalue spectrum should the regularizer start aka tau. The other parameters of the experiment are fixed: batch_size=2500, lr=0.001, number of realizations=3, regularization strengths=[0.1, 1, 5], slope values=[1.06, 1.04, 1.02, 1.00]. :param tau: integer, at what point in the eigenvalue spectrum should we regularize :param activation: string, what activation function to use (only choices are tanh and relu) :param cuda: boolean, flag indicating whether to use GPU or not :param num_epochs: integer, number of epochs to train model for :param vanilla: boolean, flag indicating whether to use spectral regularizer or not :param lr: float, learning rate to use :param save_dir: string, location where to save models """ realizations = 3 batch_size = 3500 # 1.25 times the widest layer in the network train_loader, _, full_loader = get_data(dataset='MNIST', batch_size=batch_size, _seed=0, validate=False, data_dir='') if vanilla: kwargs = { "dims": [(28 * 28, 2000), (2000, 10)], "activation": activation, "architecture": "mlp", "trainer": "vanilla", "regularizer": "no", 'alpha_jacob': 1e-4, 'bn': False, 'alpha_spectra': 1, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': [1.00], 'eig_start': tau } models = [ModelFactory(**kwargs) for j in range(realizations)] for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save( model_params, save_dir + 'vanilla' + '_activation=' + activation + '_epochs=' + str(num_epochs)) else: slopes = [1.00] regularizers_strengths = [1., 2., 5.] stuff_to_loop_over = product(slopes, regularizers_strengths) # In[] "Load in data loader" X_full, _ = next( iter(full_loader)) # load in full training set for eigenvectors # In[] kwargs = { "dims": [(28 * 28, 2000), (2000, 10)], "activation": activation, "architecture": "mlp", "trainer": "vanilla", "regularizer": "eig", 'alpha_jacob': 1e-4, 'bn': False, # Note that for results shown in section 4.1, we set bn=True 'alpha_spectra': 1, 'optimizer': 'adam', 'lr': lr, 'weight_decay': 0, 'cuda': cuda, 'eps': 0.3, 'only_last': True, 'gradSteps': 40, 'noRestarts': 1, 'lr_pgd': 1e-2, 'training_type': 'FGSM', 'slope': [1.00], 'eig_start': tau } counter = 0 for (slope, reg_strength) in stuff_to_loop_over: kwargs['slope'] = slope kwargs['alpha_spectra'] = reg_strength models = [ModelFactory(**kwargs) for j in range(realizations)] for j in range(realizations): for epoch in tqdm(range(num_epochs)): models[j].train_epoch(train_loader, X_full) model_params = [] for idx in range(len(models)): model_params.append((kwargs, models[idx].state_dict())) torch.save( model_params, save_dir + 'tau=' + str(tau) + '_activation=' + activation + '_epochs=' + str(num_epochs) + '_alpha=' + str(slope) + '_beta=' + str(reg_strength)) counter += 1 print( str(len(slopes) * len(regularizers_strengths) - counter) + " combos left")