def train_1d_regression():
    dim_h = 20
    n_layers = 2
    activation = "rbf"

    log_dir = (
        project_dir /
        f"runs/individual/{datetime.now().strftime('%Y%m%d_%H%M%S')}-act_{activation}"
        f"-dim_h_{dim_h}")

    x_train, y_train, x_val, y_val, x_all, y_all = get_toy_data(num_samples=70,
                                                                sigma=0.1)
    dataloader_train = DataLoader(TensorDataset(x_train, y_train),
                                  batch_size=70,
                                  shuffle=True)

    model = BayesianMLP(
        dim_in=1,
        dim_out=1,
        dim_h=dim_h,
        n_layers=n_layers,
        prior_type="mixture",
        # prior_sigma=prior_sigma,
        prior_pi=0.5,
        prior_sigma_1=9.0,
        prior_sigma_2=0.01,
        posterior_rho_init=-3.0,
        activation=activation,
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    return train(
        model,
        optimizer,
        dataloader_train,
        n_epochs=250,
        log_dir=log_dir,
        evaluate_func=eval_1d_regression,
        evaluate_data=(x_train, y_train, x_val, y_val, x_all, y_all, 20),
        model_noise_var=1.0,
        M=70,
    )
Esempio n. 2
0
def poisson_samples(x_values, Tsize, lambda_max, dt):
    n_samples = x_values.shape[0]
    input_size = x_values.shape[1]

    out = np.zeros((n_samples, Tsize, input_size))

    for ti in xrange(Tsize):
        out[:, ti, :] = poisson(lambda_max * x_values, dt)

    return out


#############################################

x_values, y_values = get_toy_data()
y_uniq = np.unique(y_values)
y_hot = one_hot(y_values, len(y_uniq))

n_train = (4 * x_values.shape[0] / 5)
n_test = (1 * x_values.shape[0] / 5)

batch_size = 400

n_train_batches = n_train / batch_size
n_test_batches = n_test / batch_size

T = 50.0
dt = 1.0
num_steps = int(T / dt)
def objective(trial: optuna.Trial):
    dim_h = 512
    n_layers = 1
    activation = "rbf"
    sigma_model = 0.1
    num_samples = 70
    # sigma_prior = trial.suggest_float("prior_sigma", low=1e-3, high=10.0, log=True)
    # M = trial.suggest_int("M", low=1, high=200, log=True)
    model_noise_var = 1.0
    prior_sigma_1 = trial.suggest_float("prior_sigma_1",
                                        low=1.0,
                                        high=20.0,
                                        log=True)
    # prior_sigma_2 = 1.0
    prior_sigma_2 = trial.suggest_float("prior_sigma_2",
                                        low=1e-3,
                                        high=1.,
                                        log=True)
    # prior_pi = 1.0
    prior_pi = trial.suggest_float("prior_pi", low=0.0, high=1.0)
    posterior_rho_init = trial.suggest_float("posterior_rho_init",
                                             low=-5.0,
                                             high=-1.0)
    # posterior_rho_init = -2.

    log_dir = (
        project_dir /
        f"runs/{trial.study.study_name}/trial_{trial.number}-dim_h_{dim_h}-n_layers_{n_layers}-act_{activation}-"
        # f"sigma_{sigma_prior:.2f}"
        f"s1_{prior_sigma_1:.2f}-s2_{prior_sigma_2:.2f}-pi_{prior_pi:.2f}-scale_{posterior_rho_init}"
    )

    x_train, y_train, x_val, y_val, x_all, y_all = get_toy_data(
        num_samples=num_samples, sigma=sigma_model)
    dataloader_train = DataLoader(TensorDataset(x_train, y_train),
                                  batch_size=num_samples,
                                  shuffle=True)

    model = BayesianMLP(
        dim_in=1,
        dim_out=1,
        dim_h=dim_h,
        n_layers=n_layers,
        prior_type="mixture",
        # prior_sigma=prior_sigma,
        prior_pi=prior_pi,
        prior_sigma_1=prior_sigma_1,
        prior_sigma_2=prior_sigma_2,
        posterior_rho_init=posterior_rho_init,
        activation=activation,
    )
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
    return train(
        model,
        optimizer,
        dataloader_train,
        n_epochs=150,
        log_dir=log_dir,
        evaluate_func=eval_1d_regression,
        evaluate_data=(x_train, y_train, x_val, y_val, x_all, y_all, 50),
        model_noise_var=model_noise_var,
        M=1,
    )
Esempio n. 4
0
Tsize = int(T / dt + dt)

lambda_max = 200.0 / 1000.0  # 1/ms

P0 = 20.0 / lambda_max
P1 = 20.0 / (lambda_max * lambda_max)
# alt
P0 = P1
P1 = P1 / 10.0

learning_rate = 3 * 0.21

act = SigmoidActivation()

x_values, y_values = get_toy_data(seed=2)
n_train = (4 * x_values.shape[0] / 5)
n_valid = (1 * x_values.shape[0] / 5)

y_uniq = np.unique(y_values)
y_hot = one_hot(y_values, len(y_uniq))

input_size = x_values.shape[1]
input_len = x_values.shape[0]
hidden_size = 300
output_size = len(y_uniq)

batch_size = 500
n_train_batches = n_train / batch_size
n_valid_batches = n_valid / batch_size
batch_to_listen = 1
Esempio n. 5
0
def run(W, p_init):
    tmp_dir = "/home/alexeyche/bm"
    # [ os.remove(pj(tmp_dir, f)) for f in os.listdir(tmp_dir) if f[-4:] == ".png" ]
    #
    np.random.seed(5)

    input_size = 2
    hidden_size = 100
    output_size = 2
    batch_size = 50
    epsilon = 1.0
    beta = 1.0
    n_neg = 20
    n_pos = 4

    net_size = input_size + hidden_size + output_size
    sl0, sl1, sl2 = input_size, input_size + hidden_size, input_size + hidden_size + output_size

    # W = (W + W.T)/2.0

    bias = np.zeros((net_size, ))

    def make_lrate_matrices(lrate01, lrate12):
        w_lrates = np.zeros(W.shape)
        w_lrates[:sl0, sl0:sl1] = lrate01
        w_lrates[sl0:sl1, :sl0] = lrate01
        w_lrates[sl0:sl1, sl1:sl2] = lrate12
        w_lrates[sl1:sl2, sl0:sl1] = lrate12

        b_lrates = np.zeros(bias.shape)
        b_lrates[sl0:sl1] = lrate01
        b_lrates[sl1:sl2] = lrate12

        return w_lrates, b_lrates

    def safe_params(W, bias):
        import cPickle

        biases_values = [bias[:sl0], bias[sl0:sl1], bias[sl1:sl2]]
        weights_values = [W[:sl0, sl0:sl1], W[sl0:sl1, sl1:sl2]]
        to_dump = (biases_values, weights_values, {
            "hidden_sizes": [hidden_size],
            "batch_size": batch_size
        }, {
            "training error": [],
            "validation error": []
        })
        f = file(
            "/home/alexeyche/distr/Towards-a-Biologically-Plausible-Backprop/net1.save",
            'wb')
        cPickle.dump(to_dump, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()

    # safe_params(W, bias)

    w_lrates, b_lrates = make_lrate_matrices(0.05, 0.01)

    act = ClipActivation()
    model = ExpDecayHopfield(act, W, bias)
    cost = MseCost()

    x_values, y_values = get_toy_data(seed=2)
    # shs(x_values, labels=y_values)
    n_batches = x_values.shape[0] / batch_size

    u = p_init  #np.zeros((batch_size, net_size))

    index = 0

    x_v = get_batch(x_values, index, batch_size=batch_size)
    y_idx = get_batch(y_values, index, batch_size=batch_size)
    y_v = one_hot(y_idx, 2)

    x_t, y_t = x_v, y_v
    output_size = y_t.shape[-1]
    input_size = x_t.shape[-1]

    x = u.copy()
    print "W01: ", np.mean(W[:sl0, sl0:sl1])
    print "W12: ", np.mean(W[sl0:sl1, sl1:sl2])
    dx_acc, x_acc = [], []

    x[:, :input_size] = x_t

    dx = model.dynamics(x)
    dx = -dx

    dx_acc.append(dx)
    x_acc.append(x)

    u, cost_val_neg, V_neg = model.run(u,
                                       n_neg,
                                       x_v,
                                       y_v,
                                       cost,
                                       epsilon=epsilon,
                                       clamped=False)

    u_pos, cost_val_pos, V_pos = model.run(u,
                                           n_pos,
                                           x_v,
                                           y_v,
                                           cost,
                                           epsilon=epsilon,
                                           clamped=True,
                                           beta=beta)

    u[:, :input_size] = x_t
    u_pos[:, :input_size] = x_t

    dWn, dbn = model.grad(u)
    dWp, dbp = model.grad(u_pos)

    dW = 2.0 * (dWp - dWn) / beta / batch_size
    db = (dbp - dbn) / beta / batch_size

    return V_neg, V_pos, dW, db
Esempio n. 6
0
def run(W, p_init):
    tmp_dir = "/home/alexeyche/bm"
    # [ os.remove(pj(tmp_dir, f)) for f in os.listdir(tmp_dir) if f[-4:] == ".png" ]
    #
    np.random.seed(5)

    input_size = 2
    hidden_size = 100
    output_size = 2
    batch_size = 50
    epsilon = 1.0
    beta = 1.0
    n_neg = 20
    n_pos = 4


    net_size = input_size + hidden_size + output_size
    sl0, sl1, sl2 = input_size, input_size+hidden_size, input_size+hidden_size+output_size

    # W = (W + W.T)/2.0

    bias = np.zeros((net_size,))

    
    def make_lrate_matrices(lrate01, lrate12):
        w_lrates = np.zeros(W.shape)
        w_lrates[:sl0, sl0:sl1] = lrate01
        w_lrates[sl0:sl1, :sl0] = lrate01
        w_lrates[sl0:sl1, sl1:sl2] = lrate12
        w_lrates[sl1:sl2, sl0:sl1] = lrate12

        b_lrates = np.zeros(bias.shape)
        b_lrates[sl0:sl1] = lrate01
        b_lrates[sl1:sl2] = lrate12
        
        return w_lrates, b_lrates

    def safe_params(W, bias):
        import cPickle
        
        biases_values  = [
            bias[:sl0], bias[sl0:sl1], bias[sl1:sl2]
        ]
        weights_values = [
            W[:sl0,sl0:sl1], W[sl0:sl1, sl1:sl2]
        ]
        to_dump = (
            biases_values, 
            weights_values, 
            {"hidden_sizes": [hidden_size], "batch_size": batch_size}, 
            {"training error": [], "validation error": []}
        )
        f = file("/home/alexeyche/distr/Towards-a-Biologically-Plausible-Backprop/net1.save", 'wb')
        cPickle.dump(to_dump, f, protocol=cPickle.HIGHEST_PROTOCOL)
        f.close()


    # safe_params(W, bias)


    w_lrates, b_lrates = make_lrate_matrices(0.05, 0.01)

    act = ClipActivation()
    model = ExpDecayHopfield(act, W, bias)
    cost = MseCost()


    x_values, y_values = get_toy_data(seed=2)
    # shs(x_values, labels=y_values)
    n_batches = x_values.shape[0]/batch_size

    u = p_init #np.zeros((batch_size, net_size))

    index = 0

    x_v = get_batch(x_values, index, batch_size=batch_size)
    y_idx = get_batch(y_values, index, batch_size=batch_size)
    y_v = one_hot(y_idx, 2)

    
    x_t, y_t = x_v, y_v
    output_size = y_t.shape[-1]
    input_size = x_t.shape[-1]

    x = u.copy()
    print "W01: ", np.mean(W[:sl0, sl0:sl1])
    print "W12: ", np.mean(W[sl0:sl1, sl1:sl2])
    dx_acc, x_acc = [], []
    
    x[:, :input_size] = x_t

    dx = model.dynamics(x)
    dx = - dx

    dx_acc.append(dx)
    x_acc.append(x)

    u, cost_val_neg, V_neg = model.run(
        u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False
    )

    u_pos, cost_val_pos, V_pos = model.run(
        u, n_pos, x_v, y_v, cost, epsilon=epsilon, clamped=True, beta=beta
    )

    u[:, :input_size] = x_t
    u_pos[:, :input_size] = x_t

    dWn, dbn = model.grad(u)
    dWp, dbp = model.grad(u_pos)
    
    dW = 2.0 * (dWp - dWn) / beta / batch_size
    db = (dbp - dbn) / beta / batch_size
    
        
    return V_neg, V_pos, dW, db
    
import numpy as np

import xgboost as xgb

import sys
from datasets import get_toy_data

x_values, y_values = get_toy_data()

n_train = (4 * x_values.shape[0]/5)
n_valid = (1 * x_values.shape[0]/5)
        
x_train = x_values[:n_train]
x_test = x_values[n_train:(n_train+n_valid)]

y_train = y_values[:n_train]
y_test = y_values[n_train:(n_train+n_valid)]

xg_train = xgb.DMatrix(x_train, label=y_train)
xg_test = xgb.DMatrix(x_test, label=y_test)

param = {}
# use softmax multi-class classification
param['objective'] = 'multi:softmax'
# param['objective'] = 'binary:logistic'
# scale weight of positive examples
param['eta'] = 0.1
param['max_depth'] = 6
param['silent'] = True
param['nthread'] = 4
Esempio n. 8
0
from models import Hopfield, ExpDecayHopfield
from activation import *
from cost import MseCost
from models import initialize_layer
from opt import *


def get_batch(d, idx, batch_size):
    return d[idx*batch_size:(idx+1)*batch_size]

def one_hot(y, y_size):
    y_oh = np.zeros((y.shape[0], y_size))
    y_oh[np.arange(y.shape[0]), y] = 1.0
    return y_oh

x_values, y_values = get_toy_data(seed=2)


tmp_dir = "/home/alexeyche/bm"
# [ os.remove(pj(tmp_dir, f)) for f in os.listdir(tmp_dir) if f[-4:] == ".png" ]
#
np.random.seed(8)

input_size = x_values.shape[1]
hidden_size = 100
output_size = len(np.unique(y_values))
batch_size = 100
epsilon = 1.0
beta = 1.0
n_neg = 20
n_pos = 4
Esempio n. 9
0
def run_then_return_val_loss(num_iters, hyperparameters):
    epsilon = hyperparameters["epsilon"]
    hidden_size = hyperparameters["hidden_size"]
    lrate0 = hyperparameters["lrate0"]
    lrate1 = hyperparameters["lrate1"]
    n_neg = hyperparameters["n_neg"]
    n_pos = hyperparameters["n_pos"]
    beta = hyperparameters["beta"]

    x_values, y_values = get_toy_data(seed=2)

    input_size = x_values.shape[1]
    output_size = len(np.unique(y_values))
    batch_size = 100
    net_size = input_size + hidden_size + output_size
    sl0, sl1, sl2 = input_size, input_size + hidden_size, input_size + hidden_size + output_size

    def make_feed_forward(w):
        w[:sl0, :sl0] = 0.0
        w[sl0:sl1, sl0:sl1] = 0.0
        w[sl1:sl2, sl1:sl2] = 0.0
        w[:sl0, sl1:sl2] = 0.0
        w[sl1:sl2, :sl0] = 0.0
        return w

    def make_lrate_matrices(lrate01, lrate12):
        w_lrates = np.zeros(W.shape)
        w_lrates[:sl0, sl0:sl1] = lrate01
        w_lrates[sl0:sl1, :sl0] = lrate01
        w_lrates[sl0:sl1, sl1:sl1] = lrate12
        w_lrates[sl1:sl2, sl0:sl1] = lrate12

        b_lrates = np.zeros(bias.shape)
        # b_lrates[:sl0] = lrate01
        b_lrates[sl0:sl1] = lrate01
        b_lrates[sl1:sl2] = lrate12

        return w_lrates, b_lrates

    W = initialize_layer(net_size, net_size)
    W = (W + W.T) / 2.0
    W = make_feed_forward(W)

    bias = np.zeros((net_size, ))

    act = ClipActivation()
    model = ExpDecayHopfield(act, W, bias)
    cost = MseCost()
    n_train = (4 * x_values.shape[0] / 5) / batch_size
    n_valid = (1 * x_values.shape[0] / 5) / batch_size

    n_batches = x_values.shape[0] / batch_size

    u_p = np.zeros((batch_size, net_size))
    u_p_v = np.zeros((batch_size, net_size))

    w_lrates, b_lrates = make_lrate_matrices(lrate0, lrate1)

    opt = SGDOpt([w_lrates, b_lrates])

    opt.init(model.W, model.b)

    for e in xrange(num_iters):
        cost_val_stat_t, dW_stat_t, db_stat_t, acc_stat_t = 0.0, 0.0, 0.0, 0.0

        mom_acc, db_acc = np.zeros(model.W.shape), np.zeros(model.b.shape)

        # for index in np.random.permutation(xrange(n_train)):
        for index in xrange(n_train):
            # u = u_p[index * batch_size: (index + 1) * batch_size]
            u = u_p.copy()

            x_v = get_batch(x_values, index, batch_size=batch_size)
            y_idx = get_batch(y_values, index, batch_size=batch_size)
            y_v = one_hot(y_idx, output_size)

            u, cost_val_neg, V_neg = model.run(u,
                                               n_neg,
                                               x_v,
                                               y_v,
                                               cost,
                                               epsilon=epsilon,
                                               clamped=False)

            u_pos, _, V_pos = model.run(u,
                                        n_pos,
                                        x_v,
                                        y_v,
                                        cost,
                                        epsilon=epsilon,
                                        clamped=True,
                                        beta=beta)

            u[:, :input_size] = x_v
            u_pos[:, :input_size] = x_v

            dWn, dbn = model.grad(u)
            dWp, dbp = model.grad(u_pos)

            dW = 2.0 * (dWp - dWn) / beta / batch_size
            db = (dbp - dbn) / beta / batch_size

            model.W, model.b = opt.update((model.W, dW), (model.b, db))

            cost_val_stat_t += np.mean(cost_val_neg)
            dW_stat_t += np.mean(dW)
            db_stat_t += np.mean(db)
            acc_stat_t += np.mean(np.argmax(u[:, -output_size:], 1) != y_idx)

            # u_p[index * batch_size: (index + 1) * batch_size] = u
            u_p = u
            # shm(u[:,input_size:], u_pos[:,input_size:], file=pj(tmp_dir, "{}_{}_u.png".format(e, index)))

        cost_val_stat_v, acc_stat_v = 0.0, 0.0

        for index in xrange(n_valid):
            dindex = n_train + index
            u = u_p_v.copy(
            )  #u_p[dindex * batch_size: (dindex + 1) * batch_size]

            x_v = get_batch(x_values, dindex, batch_size=batch_size)
            y_idx = get_batch(y_values, dindex, batch_size=batch_size)
            y_v = one_hot(y_idx, output_size)

            u, cost_val_neg, V_neg = model.run(u,
                                               n_neg,
                                               x_v,
                                               y_v,
                                               cost,
                                               epsilon=epsilon,
                                               clamped=False)

            cost_val_stat_v += np.mean(cost_val_neg)
            acc_stat_v += np.mean(np.argmax(u[:, -output_size:], 1) != y_idx)
            u[:, :input_size] = x_v

            # u_p[dindex*batch_size: (dindex + 1)*batch_size] = u
            u_p_v = u

    print cost_val_stat_v / n_valid, acc_stat_v / n_valid
    return cost_val_stat_v / n_valid
Esempio n. 10
0
def run_then_return_val_loss(num_iters, hyperparameters):
    epsilon = hyperparameters["epsilon"]
    hidden_size = hyperparameters["hidden_size"]
    lrate0 = hyperparameters["lrate0"]
    lrate1 = hyperparameters["lrate1"]
    n_neg = hyperparameters["n_neg"]
    n_pos = hyperparameters["n_pos"]
    beta = hyperparameters["beta"]

    x_values, y_values = get_toy_data(seed=2)

    input_size = x_values.shape[1]
    output_size = len(np.unique(y_values))
    batch_size = 100
    net_size = input_size + hidden_size + output_size
    sl0, sl1, sl2 = input_size, input_size+hidden_size, input_size+hidden_size+output_size

    def make_feed_forward(w):
        w[:sl0, :sl0] = 0.0
        w[sl0:sl1, sl0:sl1] = 0.0
        w[sl1:sl2, sl1:sl2] = 0.0
        w[:sl0, sl1:sl2] = 0.0
        w[sl1:sl2, :sl0] = 0.0
        return w

    def make_lrate_matrices(lrate01, lrate12):
        w_lrates = np.zeros(W.shape)
        w_lrates[:sl0, sl0:sl1] = lrate01
        w_lrates[sl0:sl1, :sl0] = lrate01
        w_lrates[sl0:sl1, sl1:sl1] = lrate12
        w_lrates[sl1:sl2, sl0:sl1] = lrate12

        b_lrates = np.zeros(bias.shape)
        # b_lrates[:sl0] = lrate01
        b_lrates[sl0:sl1] = lrate01
        b_lrates[sl1:sl2] = lrate12
        
        return w_lrates, b_lrates


    W = initialize_layer(net_size, net_size)
    W = (W + W.T)/2.0
    W = make_feed_forward(W)

    bias = np.zeros((net_size,))

    act = ClipActivation()
    model = ExpDecayHopfield(act, W, bias)
    cost = MseCost()
    n_train = (4 * x_values.shape[0]/5)/batch_size
    n_valid = (1 * x_values.shape[0]/5)/batch_size

    n_batches = x_values.shape[0]/batch_size

    u_p = np.zeros((batch_size, net_size))
    u_p_v = np.zeros((batch_size, net_size))

    w_lrates, b_lrates = make_lrate_matrices(lrate0, lrate1)

    opt = SGDOpt([w_lrates, b_lrates])

    opt.init(model.W, model.b)

    for e in xrange(num_iters):
        cost_val_stat_t, dW_stat_t, db_stat_t, acc_stat_t = 0.0, 0.0, 0.0, 0.0

        mom_acc, db_acc = np.zeros(model.W.shape), np.zeros(model.b.shape)
        
        # for index in np.random.permutation(xrange(n_train)):
        for index in xrange(n_train):
            # u = u_p[index * batch_size: (index + 1) * batch_size]
            u = u_p.copy()

            x_v = get_batch(x_values, index, batch_size=batch_size)
            y_idx = get_batch(y_values, index, batch_size=batch_size)
            y_v = one_hot(y_idx, output_size)

            u, cost_val_neg, V_neg = model.run(
                u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False
            )
            
            u_pos, _, V_pos = model.run(
                u, n_pos, x_v, y_v, cost, epsilon=epsilon, clamped=True, beta=beta
            )

            u[:, :input_size] = x_v
            u_pos[:, :input_size] = x_v


            dWn, dbn = model.grad(u)
            dWp, dbp = model.grad(u_pos)
            
            dW = 2.0 * (dWp - dWn) / beta / batch_size
            db = (dbp - dbn) / beta / batch_size
        
            
            model.W, model.b = opt.update((model.W, dW), (model.b, db))

            cost_val_stat_t += np.mean(cost_val_neg)
            dW_stat_t += np.mean(dW)
            db_stat_t += np.mean(db)
            acc_stat_t += np.mean(np.argmax(u[:,-output_size:],1) != y_idx)
                
            # u_p[index * batch_size: (index + 1) * batch_size] = u
            u_p = u
            # shm(u[:,input_size:], u_pos[:,input_size:], file=pj(tmp_dir, "{}_{}_u.png".format(e, index)))
                
        cost_val_stat_v, acc_stat_v = 0.0, 0.0

        for index in xrange(n_valid):
            dindex = n_train+index
            u = u_p_v.copy() #u_p[dindex * batch_size: (dindex + 1) * batch_size]

            x_v = get_batch(x_values, dindex, batch_size=batch_size)
            y_idx = get_batch(y_values, dindex, batch_size=batch_size)
            y_v = one_hot(y_idx, output_size)

            u, cost_val_neg, V_neg = model.run(
                u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False
            )
            
            cost_val_stat_v += np.mean(cost_val_neg)
            acc_stat_v += np.mean(np.argmax(u[:,-output_size:],1) != y_idx)
            u[:, :input_size] = x_v

            # u_p[dindex*batch_size: (dindex + 1)*batch_size] = u
            u_p_v = u

    print cost_val_stat_v/n_valid, acc_stat_v/n_valid
    return cost_val_stat_v/n_valid