def run_trial(f_dfs):
    X = genX(n)
    Y = genY(X)
    batch_fn = make_flat_batch_fn(X, Y, n_per_batch)

    Xvalid = genX(1000)
    Yvalid = genY(Xvalid)

    # Yrms = rms(Y, axis=1).mean()
    # Yvalidrms = rms(Yvalid, axis=1).mean()

    # --- initial weights
    weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng)
    # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights)))

    # genB = lambda shape: initial_w(shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.2)
    genB = lambda shape: initial_w(
        shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.3)
    directBs = [genB((dout, dhid)) for dhid in dhids]

    def test_derivative(f, df):

        get_network = lambda **kwargs: Network(
            weights, f=f, df=df, biases=None, noise=0, **kwargs)

        bp_learner = BPLearner(get_network(),
                               squared_cost,
                               rms_error,
                               eta=eta,
                               alpha=alpha,
                               name='BP')
        bp_learner.weight_norms = []

        fas_learner = FASkipLearner(get_network(),
                                    squared_cost,
                                    rms_error,
                                    eta=eta,
                                    alpha=alpha,
                                    name='DFA')
        fas_learner.Bs = directBs

        learners = [bp_learner, fas_learner]
        for learner in learners:
            learner.train(1, batch_fn)

        for learner in learners:
            print(", ".join("||W%d|| = %0.3f" % (i, norm(w))
                            for i, w in enumerate(learner.network.weights)))

        return learners

    results = []
    for f_df in f_dfs:
        results.append(test_derivative(*f_df))

    return results
Exemple #2
0
    pointers = UniformHypersphere(surface=True).sample(n_labels,
                                                       d=dout,
                                                       rng=rng)
    dtp_cheat = False

    def cost(y, yinds, pointers=pointers):
        return pointer_squared_cost_on_inds(y, yinds, pointers)

    def error(y, yinds, pointers=pointers):
        return pointer_class_error_on_inds(y, yinds, pointers)


din = trainX.shape[1]
sizes = [din] + dhids + [dout]

batch_fn = make_flat_batch_fn(trainX, trainY, n_per_batch)

# weights = initial_weights(sizes, kind='uniform', scale=0.001, rng=rng)
weights = initial_weights(sizes, kind='ortho', scale=1, rng=rng)

f, df = static_f_df('liflinear', tau_rc=0.05, amplitude=0.024)

# eta = 1e-1
# eta = 5e-2
# eta = 2e-2
# eta = 1e-2
eta = 4e-3
# eta = 1e-3
# eta = 5e-4
# eta = 2e-4
# eta = 1e-4
Exemple #3
0
alpha = 0
# alpha = 1e-8
# alpha = 1e-6

# Learner = ShallowLearner
Learner = BPLearner

# --- problem dataset
T = orthogonalize(rng.normal(size=(din, dout)))
genX = lambda n: rng.normal(scale=1., size=(n, din))
genY = lambda X: np.dot(X, T)

X = genX(n)
Y = genY(X)
batch_fn = make_flat_batch_fn(X, Y, n_per_batch)

Xvalid = genX(10000)
Yvalid = genY(Xvalid)


def arg_string(args):
    return "{%s}" % ", ".join(
        "%s: %s" % (k, arg_string(v)) if isinstance(v, dict) else "%s: %0.1e" %
        (k, v) if is_number(v) else "%s: %s" % (k, v) for k, v in args.items())


def objective(args):
    sargs = arg_string(args)
    w_kind = args['w_kind']
    w_scale = args['w_scale']
Exemple #4
0
    eta=0.5 * eta,
    momentum=momentum,
    name='FA')
# genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng)
# genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.15, rng=rng)
genB = lambda shape: initial_w(
    shape, kind='ortho', normkind='rightmean', scale=0.1, rng=rng)
fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids]

# learners = [bp_learner]
# learners = [bpl_learner]
# learners = [fas_learner]
learners = [bp_learner, bpl_learner, fas_learner]

batch_size = 10
batch_fn = make_flat_batch_fn(Xtrain, Ytrain, batch_size)
for learner in learners:
    print("%s: %s" % (learner.name, weight_norm_s(learner.network.weights)))
    if hasattr(learner, 'Bs'):
        print("%s B: %s" % (learner.name, weight_norm_s(learner.Bs)))

    # learner.train(5, batch_fn, test_set=(Xtest, Ytest))
    # learner.train(10, batch_fn, test_set=(Xtest, Ytest))
    # learner.train(15, batch_fn, test_set=(Xtest, Ytest))
    # learner.train(25, batch_fn, test_set=(Xtest, Ytest))
    # learner.train(50, batch_fn, test_set=(Xtest, Ytest))
    learner.train(epochs, batch_fn, test_set=(Xtest, Ytest))

    # print(learner.test((Xtest, Ytest)).mean())

    # print(np.round(learner.network.weights[0].T, 3))
Exemple #5
0
    solver2 = hunse_thesis.solvers.SoftmaxSGD(n_epochs=2,
                                              reg=reg,
                                              eta=eta,
                                              momentum=momentum,
                                              batch_size=batch_size,
                                              verbose=1)
    # solver = hunse_thesis.solvers.SoftmaxSGD(
    #     reg=0.001, n_epochs=1, eta=1e-4, momentum=0.9, batch_size=100, verbose=1)
    # W1 = solver(h1[:100], trainT[:100], X=weights[-1])
    W1, _ = solver2(h1, trainT, X=weights[-1])
    train_out2 = np.dot(h1, W1)
    train_cost2 = cost(train_out2, trainY)[0].mean()
    print(train_cost2)

    batch_fn = make_flat_batch_fn(trainX, trainY, batch_size)

    learner = ShallowLearner(get_network(),
                             cost,
                             error,
                             eta=eta,
                             alpha=alpha,
                             momentum=momentum)

    learner.train(2, batch_fn, test_set=test)
    train_out3 = learner.network.predict(trainX)
    train_cost3 = cost(train_out3, trainY)[0].mean()
    train_error3 = error(train_out3, trainY).mean()
    print((train_cost3, train_error3))

    assert 0