def simple_benchmark(ds, replicas=10, epochs=100, starting_neurons=1000): dl = get_dl(ds) dl2 = get_dl(ds, False) lambdas = np.power(10.0, -np.arange(1, 8)) lambdas = np.insert(lambdas, 0, 0) l = lambdas lambdas = np.tile(lambdas, (replicas, 1)).reshape(-1) models = [MNIST_1h_sparsifier(starting_neurons).cuda() for _ in lambdas] result = train(models, dl, dl2, lamb=lambdas, epochs=epochs, l2_penalty=0) result = [x.reshape(epochs, replicas, -1).mean(1).T for x in result] plot_training(l, result[2], result[3], result[0], ds.__name__, epochs) plot_end(l, result[2], result[3], result[0], ds.__name__, epochs) return result
def benchmark_dataset(ds, l2_penalty=0.001, suffix='', test_train=False): subpool = Pool(40) dl = get_dl(ds, True) dl2 = get_dl(ds, test_train) sizes = np.array(range(0, 500, 25)) models = [wrap(MNIST_1h_flexible(500, wrap, k)) for k in range(0, 500, 25)] train(models, dl, 1e-5, l2_penalty=l2_penalty) accuracies = np.array(get_accuracy(models, dl)) plot_accuracies(accuracies, sizes, ds.__name__, suffix) plot_convergence(models, sizes, ds.__name__, suffix) powers = -np.arange(2.5, 8, 0.5) weights = 10**powers data = np.array([get_data((dl, dl2, x, l2_penalty)) for x in weights.tolist()]) best_model = wrap(MNIST_1h(1000)) simple_train([best_model], dl, EPOCHS) plot_frontier(powers, data, get_accuracy([best_model], dl2)[0], ds.__name__, suffix)
def train_algo(model_gen, ds, l=1, size=50, f=10): models = [] dl1 = get_dl(ds, True) dl2 = get_dl(ds, False) gbs = 0 l *= f def preout(x): values = [m(x) for m in models] return sum(values[1:], values[0]) while l > 1e-9: l /= f model = model_gen() pr = preout if len(models) > 0 else None bm, bs, sizes, losses, accs, taccs = simple_train(model, dl1, dl2, lamb=l, pre_out=pr) if sizes[-1] == 0 or bs < gbs: continue else: print('temp - best score', bs) while True: l *= f cm, cs, ss, ll, aa, taa = simple_train(bm, dl1, dl2, lamb=l, pre_out=pr) if cs < bs: break else: bm = cm bs = cs print('temp - best score', bs) print('block score') if bs > gbs: models.append(bm) print('current size', sum([tn(m.l0_loss().data) for m in models])) gbs = bs else: return models return models
def compare_convergence(ds): dl = get_dl(ds) replicas = 30 r = range(replicas) simple_models = [MNIST_1h_flexible(500, wrap, 0).cuda() for _ in r] random_models = [MNIST_1h_flexible_random(500, wrap, 0).cuda() for _ in r] all_models = simple_models + random_models result = train(all_models, dl, lamb=0, epochs=EPOCHS * 4, l2_penalty=0) sizes, gradients, losses = [x.reshape(-1, 2, replicas).mean(axis=2).T for x in result] labels = ['Deterministic Model', 'Random Model'] plot_convergence_comparison(sizes, gradients, losses, ds.__name__, labels, 'deterministic_random_comparison')
def behavior_on_pretrained(ds): dl = get_dl(ds) replicas = 30 r = range(replicas) pretraining_epochs = 10 evaluation_epochs = EPOCHS * 4 half_trained_models = [MNIST_1h_flexible(500, wrap, 500).cuda() for _ in r] fully_trained_models = [MNIST_1h_flexible(500, wrap, 500).cuda() for _ in r] fresh_models = [MNIST_1h_flexible(500, wrap, 0).cuda() for _ in r] train(fully_trained_models, dl, lamb=0, epochs=pretraining_epochs, l2_penalty=0) train(half_trained_models, dl, lamb=0, epochs=int(pretraining_epochs / 2), l2_penalty=0) for m in half_trained_models + fully_trained_models: m.x_0.data.zero_() # We reset everyone to zero neurons all_models = fully_trained_models + half_trained_models + fresh_models result = train(all_models, dl, lamb=0, epochs=evaluation_epochs, l2_penalty=0) sizes, gradients, losses = [x.reshape(-1, 3, replicas).mean(axis=2).T for x in result] labels = ['Pretrained %s epochs' % pretraining_epochs, 'Pretrained %s epochs' % int(pretraining_epochs / 2), 'Fresh Model'] plot_convergence_comparison(sizes, gradients, losses, ds.__name__, labels, 'flexible_behavior_on_pretrained')
def validate_plateau_hypothesis(ds): dl = get_dl(ds, False) # Testing because it is smaller, does not change anything model = wrap(MNIST_1h_flexible(500, wrap, 250)) train([model], dl, 0, l2_penalty=0.001) total_weights = unwrap(torch.abs(model.output_layer.weight).sum(0).data).numpy() scaler = unwrap(model.get_scaler().data).numpy() plt.figure(figsize=(10, 5)) plt.title('Proof that the regularization is responsible for the size plateau') a = plt.gca() b = a.twinx() b.plot(total_weights, label='Sum of absolute weights associated', color='C0') a.plot(scaler, label='Neuron used (Smoothe Indicator function)', color='C1') a.legend(loc='upper right') b.legend(loc='lower right') plt.xlabel('neuron') a.set_ylabel('Neuron liveness') b.set_ylabel('Sum of abs. weights') plt.tight_layout() plt.savefig('./plots/%s_1h_plateau_explanation.png' % (ds.__name__)) plt.close()