def simple_benchmark(ds, replicas=10, epochs=100, starting_neurons=1000):
    dl = get_dl(ds)
    dl2 = get_dl(ds, False)
    lambdas = np.power(10.0, -np.arange(1, 8))
    lambdas = np.insert(lambdas, 0, 0)
    l = lambdas
    lambdas = np.tile(lambdas, (replicas, 1)).reshape(-1)
    models = [MNIST_1h_sparsifier(starting_neurons).cuda() for _ in lambdas]
    result = train(models, dl, dl2, lamb=lambdas, epochs=epochs, l2_penalty=0)
    result = [x.reshape(epochs, replicas, -1).mean(1).T for x in result]
    plot_training(l, result[2], result[3], result[0], ds.__name__, epochs)
    plot_end(l, result[2], result[3], result[0], ds.__name__, epochs)
    return result
def benchmark_dataset(ds, l2_penalty=0.001, suffix='', test_train=False):
    subpool = Pool(40)
    dl = get_dl(ds, True)
    dl2 = get_dl(ds, test_train)
    sizes = np.array(range(0, 500, 25))
    models = [wrap(MNIST_1h_flexible(500, wrap, k)) for k in range(0, 500, 25)]
    train(models, dl, 1e-5, l2_penalty=l2_penalty)
    accuracies = np.array(get_accuracy(models, dl))
    plot_accuracies(accuracies, sizes, ds.__name__, suffix)
    plot_convergence(models, sizes, ds.__name__, suffix)
    powers = -np.arange(2.5, 8, 0.5)
    weights = 10**powers
    data = np.array([get_data((dl, dl2, x, l2_penalty)) for x in weights.tolist()])
    best_model = wrap(MNIST_1h(1000))
    simple_train([best_model], dl, EPOCHS)
    plot_frontier(powers, data, get_accuracy([best_model], dl2)[0], ds.__name__, suffix)
def train_algo(model_gen, ds, l=1, size=50, f=10):
    models = []
    dl1 = get_dl(ds, True)
    dl2 = get_dl(ds, False)
    gbs = 0
    l *= f

    def preout(x):
        values = [m(x) for m in models]
        return sum(values[1:], values[0])

    while l > 1e-9:
        l /= f
        model = model_gen()
        pr = preout if len(models) > 0 else None
        bm, bs, sizes, losses, accs, taccs = simple_train(model,
                                                          dl1,
                                                          dl2,
                                                          lamb=l,
                                                          pre_out=pr)
        if sizes[-1] == 0 or bs < gbs:
            continue
        else:
            print('temp - best score', bs)
            while True:
                l *= f
                cm, cs, ss, ll, aa, taa = simple_train(bm,
                                                       dl1,
                                                       dl2,
                                                       lamb=l,
                                                       pre_out=pr)
                if cs < bs:
                    break
                else:
                    bm = cm
                    bs = cs
                    print('temp - best score', bs)
            print('block score')
            if bs > gbs:
                models.append(bm)
                print('current size',
                      sum([tn(m.l0_loss().data) for m in models]))
                gbs = bs
            else:
                return models
    return models
def compare_convergence(ds):
    dl = get_dl(ds)
    replicas = 30
    r = range(replicas)
    simple_models = [MNIST_1h_flexible(500, wrap, 0).cuda() for _ in r]
    random_models = [MNIST_1h_flexible_random(500, wrap, 0).cuda() for _ in r]
    all_models = simple_models + random_models
    result = train(all_models, dl, lamb=0, epochs=EPOCHS * 4, l2_penalty=0)
    sizes, gradients, losses = [x.reshape(-1, 2, replicas).mean(axis=2).T for x in result]
    labels = ['Deterministic Model', 'Random Model']
    plot_convergence_comparison(sizes, gradients, losses, ds.__name__, labels, 'deterministic_random_comparison')
def behavior_on_pretrained(ds):
    dl = get_dl(ds)
    replicas = 30
    r = range(replicas)
    pretraining_epochs = 10
    evaluation_epochs = EPOCHS * 4
    half_trained_models = [MNIST_1h_flexible(500, wrap, 500).cuda() for _ in r]
    fully_trained_models = [MNIST_1h_flexible(500, wrap, 500).cuda() for _ in r]
    fresh_models = [MNIST_1h_flexible(500, wrap, 0).cuda() for _ in r]
    train(fully_trained_models, dl, lamb=0, epochs=pretraining_epochs, l2_penalty=0)
    train(half_trained_models, dl, lamb=0, epochs=int(pretraining_epochs / 2), l2_penalty=0)
    for m in half_trained_models + fully_trained_models:
        m.x_0.data.zero_() # We reset everyone to zero neurons
    all_models = fully_trained_models + half_trained_models + fresh_models
    result = train(all_models, dl, lamb=0, epochs=evaluation_epochs, l2_penalty=0)
    sizes, gradients, losses = [x.reshape(-1, 3, replicas).mean(axis=2).T for x in result]
    labels = ['Pretrained %s epochs' % pretraining_epochs,
              'Pretrained %s epochs' % int(pretraining_epochs / 2),
              'Fresh Model']
    plot_convergence_comparison(sizes, gradients, losses, ds.__name__, labels, 'flexible_behavior_on_pretrained')
def validate_plateau_hypothesis(ds):
    dl = get_dl(ds, False) # Testing because it is smaller, does not change anything
    model = wrap(MNIST_1h_flexible(500, wrap, 250))
    train([model], dl, 0, l2_penalty=0.001)
    total_weights = unwrap(torch.abs(model.output_layer.weight).sum(0).data).numpy()
    scaler = unwrap(model.get_scaler().data).numpy()
    plt.figure(figsize=(10, 5))
    plt.title('Proof that the regularization is responsible for the size plateau')
    a = plt.gca()
    b = a.twinx()
    b.plot(total_weights, label='Sum of absolute weights associated', color='C0')
    a.plot(scaler, label='Neuron used (Smoothe Indicator function)', color='C1')
    a.legend(loc='upper right')
    b.legend(loc='lower right')
    plt.xlabel('neuron')
    a.set_ylabel('Neuron liveness')
    b.set_ylabel('Sum of abs. weights')
    plt.tight_layout()
    plt.savefig('./plots/%s_1h_plateau_explanation.png' % (ds.__name__))
    plt.close()