def train_1d_regression(): dim_h = 20 n_layers = 2 activation = "rbf" log_dir = ( project_dir / f"runs/individual/{datetime.now().strftime('%Y%m%d_%H%M%S')}-act_{activation}" f"-dim_h_{dim_h}") x_train, y_train, x_val, y_val, x_all, y_all = get_toy_data(num_samples=70, sigma=0.1) dataloader_train = DataLoader(TensorDataset(x_train, y_train), batch_size=70, shuffle=True) model = BayesianMLP( dim_in=1, dim_out=1, dim_h=dim_h, n_layers=n_layers, prior_type="mixture", # prior_sigma=prior_sigma, prior_pi=0.5, prior_sigma_1=9.0, prior_sigma_2=0.01, posterior_rho_init=-3.0, activation=activation, ) optimizer = torch.optim.Adam(model.parameters(), lr=0.1) return train( model, optimizer, dataloader_train, n_epochs=250, log_dir=log_dir, evaluate_func=eval_1d_regression, evaluate_data=(x_train, y_train, x_val, y_val, x_all, y_all, 20), model_noise_var=1.0, M=70, )
def poisson_samples(x_values, Tsize, lambda_max, dt): n_samples = x_values.shape[0] input_size = x_values.shape[1] out = np.zeros((n_samples, Tsize, input_size)) for ti in xrange(Tsize): out[:, ti, :] = poisson(lambda_max * x_values, dt) return out ############################################# x_values, y_values = get_toy_data() y_uniq = np.unique(y_values) y_hot = one_hot(y_values, len(y_uniq)) n_train = (4 * x_values.shape[0] / 5) n_test = (1 * x_values.shape[0] / 5) batch_size = 400 n_train_batches = n_train / batch_size n_test_batches = n_test / batch_size T = 50.0 dt = 1.0 num_steps = int(T / dt)
def objective(trial: optuna.Trial): dim_h = 512 n_layers = 1 activation = "rbf" sigma_model = 0.1 num_samples = 70 # sigma_prior = trial.suggest_float("prior_sigma", low=1e-3, high=10.0, log=True) # M = trial.suggest_int("M", low=1, high=200, log=True) model_noise_var = 1.0 prior_sigma_1 = trial.suggest_float("prior_sigma_1", low=1.0, high=20.0, log=True) # prior_sigma_2 = 1.0 prior_sigma_2 = trial.suggest_float("prior_sigma_2", low=1e-3, high=1., log=True) # prior_pi = 1.0 prior_pi = trial.suggest_float("prior_pi", low=0.0, high=1.0) posterior_rho_init = trial.suggest_float("posterior_rho_init", low=-5.0, high=-1.0) # posterior_rho_init = -2. log_dir = ( project_dir / f"runs/{trial.study.study_name}/trial_{trial.number}-dim_h_{dim_h}-n_layers_{n_layers}-act_{activation}-" # f"sigma_{sigma_prior:.2f}" f"s1_{prior_sigma_1:.2f}-s2_{prior_sigma_2:.2f}-pi_{prior_pi:.2f}-scale_{posterior_rho_init}" ) x_train, y_train, x_val, y_val, x_all, y_all = get_toy_data( num_samples=num_samples, sigma=sigma_model) dataloader_train = DataLoader(TensorDataset(x_train, y_train), batch_size=num_samples, shuffle=True) model = BayesianMLP( dim_in=1, dim_out=1, dim_h=dim_h, n_layers=n_layers, prior_type="mixture", # prior_sigma=prior_sigma, prior_pi=prior_pi, prior_sigma_1=prior_sigma_1, prior_sigma_2=prior_sigma_2, posterior_rho_init=posterior_rho_init, activation=activation, ) optimizer = torch.optim.Adam(model.parameters(), lr=0.1) return train( model, optimizer, dataloader_train, n_epochs=150, log_dir=log_dir, evaluate_func=eval_1d_regression, evaluate_data=(x_train, y_train, x_val, y_val, x_all, y_all, 50), model_noise_var=model_noise_var, M=1, )
Tsize = int(T / dt + dt) lambda_max = 200.0 / 1000.0 # 1/ms P0 = 20.0 / lambda_max P1 = 20.0 / (lambda_max * lambda_max) # alt P0 = P1 P1 = P1 / 10.0 learning_rate = 3 * 0.21 act = SigmoidActivation() x_values, y_values = get_toy_data(seed=2) n_train = (4 * x_values.shape[0] / 5) n_valid = (1 * x_values.shape[0] / 5) y_uniq = np.unique(y_values) y_hot = one_hot(y_values, len(y_uniq)) input_size = x_values.shape[1] input_len = x_values.shape[0] hidden_size = 300 output_size = len(y_uniq) batch_size = 500 n_train_batches = n_train / batch_size n_valid_batches = n_valid / batch_size batch_to_listen = 1
def run(W, p_init): tmp_dir = "/home/alexeyche/bm" # [ os.remove(pj(tmp_dir, f)) for f in os.listdir(tmp_dir) if f[-4:] == ".png" ] # np.random.seed(5) input_size = 2 hidden_size = 100 output_size = 2 batch_size = 50 epsilon = 1.0 beta = 1.0 n_neg = 20 n_pos = 4 net_size = input_size + hidden_size + output_size sl0, sl1, sl2 = input_size, input_size + hidden_size, input_size + hidden_size + output_size # W = (W + W.T)/2.0 bias = np.zeros((net_size, )) def make_lrate_matrices(lrate01, lrate12): w_lrates = np.zeros(W.shape) w_lrates[:sl0, sl0:sl1] = lrate01 w_lrates[sl0:sl1, :sl0] = lrate01 w_lrates[sl0:sl1, sl1:sl2] = lrate12 w_lrates[sl1:sl2, sl0:sl1] = lrate12 b_lrates = np.zeros(bias.shape) b_lrates[sl0:sl1] = lrate01 b_lrates[sl1:sl2] = lrate12 return w_lrates, b_lrates def safe_params(W, bias): import cPickle biases_values = [bias[:sl0], bias[sl0:sl1], bias[sl1:sl2]] weights_values = [W[:sl0, sl0:sl1], W[sl0:sl1, sl1:sl2]] to_dump = (biases_values, weights_values, { "hidden_sizes": [hidden_size], "batch_size": batch_size }, { "training error": [], "validation error": [] }) f = file( "/home/alexeyche/distr/Towards-a-Biologically-Plausible-Backprop/net1.save", 'wb') cPickle.dump(to_dump, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # safe_params(W, bias) w_lrates, b_lrates = make_lrate_matrices(0.05, 0.01) act = ClipActivation() model = ExpDecayHopfield(act, W, bias) cost = MseCost() x_values, y_values = get_toy_data(seed=2) # shs(x_values, labels=y_values) n_batches = x_values.shape[0] / batch_size u = p_init #np.zeros((batch_size, net_size)) index = 0 x_v = get_batch(x_values, index, batch_size=batch_size) y_idx = get_batch(y_values, index, batch_size=batch_size) y_v = one_hot(y_idx, 2) x_t, y_t = x_v, y_v output_size = y_t.shape[-1] input_size = x_t.shape[-1] x = u.copy() print "W01: ", np.mean(W[:sl0, sl0:sl1]) print "W12: ", np.mean(W[sl0:sl1, sl1:sl2]) dx_acc, x_acc = [], [] x[:, :input_size] = x_t dx = model.dynamics(x) dx = -dx dx_acc.append(dx) x_acc.append(x) u, cost_val_neg, V_neg = model.run(u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False) u_pos, cost_val_pos, V_pos = model.run(u, n_pos, x_v, y_v, cost, epsilon=epsilon, clamped=True, beta=beta) u[:, :input_size] = x_t u_pos[:, :input_size] = x_t dWn, dbn = model.grad(u) dWp, dbp = model.grad(u_pos) dW = 2.0 * (dWp - dWn) / beta / batch_size db = (dbp - dbn) / beta / batch_size return V_neg, V_pos, dW, db
def run(W, p_init): tmp_dir = "/home/alexeyche/bm" # [ os.remove(pj(tmp_dir, f)) for f in os.listdir(tmp_dir) if f[-4:] == ".png" ] # np.random.seed(5) input_size = 2 hidden_size = 100 output_size = 2 batch_size = 50 epsilon = 1.0 beta = 1.0 n_neg = 20 n_pos = 4 net_size = input_size + hidden_size + output_size sl0, sl1, sl2 = input_size, input_size+hidden_size, input_size+hidden_size+output_size # W = (W + W.T)/2.0 bias = np.zeros((net_size,)) def make_lrate_matrices(lrate01, lrate12): w_lrates = np.zeros(W.shape) w_lrates[:sl0, sl0:sl1] = lrate01 w_lrates[sl0:sl1, :sl0] = lrate01 w_lrates[sl0:sl1, sl1:sl2] = lrate12 w_lrates[sl1:sl2, sl0:sl1] = lrate12 b_lrates = np.zeros(bias.shape) b_lrates[sl0:sl1] = lrate01 b_lrates[sl1:sl2] = lrate12 return w_lrates, b_lrates def safe_params(W, bias): import cPickle biases_values = [ bias[:sl0], bias[sl0:sl1], bias[sl1:sl2] ] weights_values = [ W[:sl0,sl0:sl1], W[sl0:sl1, sl1:sl2] ] to_dump = ( biases_values, weights_values, {"hidden_sizes": [hidden_size], "batch_size": batch_size}, {"training error": [], "validation error": []} ) f = file("/home/alexeyche/distr/Towards-a-Biologically-Plausible-Backprop/net1.save", 'wb') cPickle.dump(to_dump, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # safe_params(W, bias) w_lrates, b_lrates = make_lrate_matrices(0.05, 0.01) act = ClipActivation() model = ExpDecayHopfield(act, W, bias) cost = MseCost() x_values, y_values = get_toy_data(seed=2) # shs(x_values, labels=y_values) n_batches = x_values.shape[0]/batch_size u = p_init #np.zeros((batch_size, net_size)) index = 0 x_v = get_batch(x_values, index, batch_size=batch_size) y_idx = get_batch(y_values, index, batch_size=batch_size) y_v = one_hot(y_idx, 2) x_t, y_t = x_v, y_v output_size = y_t.shape[-1] input_size = x_t.shape[-1] x = u.copy() print "W01: ", np.mean(W[:sl0, sl0:sl1]) print "W12: ", np.mean(W[sl0:sl1, sl1:sl2]) dx_acc, x_acc = [], [] x[:, :input_size] = x_t dx = model.dynamics(x) dx = - dx dx_acc.append(dx) x_acc.append(x) u, cost_val_neg, V_neg = model.run( u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False ) u_pos, cost_val_pos, V_pos = model.run( u, n_pos, x_v, y_v, cost, epsilon=epsilon, clamped=True, beta=beta ) u[:, :input_size] = x_t u_pos[:, :input_size] = x_t dWn, dbn = model.grad(u) dWp, dbp = model.grad(u_pos) dW = 2.0 * (dWp - dWn) / beta / batch_size db = (dbp - dbn) / beta / batch_size return V_neg, V_pos, dW, db
import numpy as np import xgboost as xgb import sys from datasets import get_toy_data x_values, y_values = get_toy_data() n_train = (4 * x_values.shape[0]/5) n_valid = (1 * x_values.shape[0]/5) x_train = x_values[:n_train] x_test = x_values[n_train:(n_train+n_valid)] y_train = y_values[:n_train] y_test = y_values[n_train:(n_train+n_valid)] xg_train = xgb.DMatrix(x_train, label=y_train) xg_test = xgb.DMatrix(x_test, label=y_test) param = {} # use softmax multi-class classification param['objective'] = 'multi:softmax' # param['objective'] = 'binary:logistic' # scale weight of positive examples param['eta'] = 0.1 param['max_depth'] = 6 param['silent'] = True param['nthread'] = 4
from models import Hopfield, ExpDecayHopfield from activation import * from cost import MseCost from models import initialize_layer from opt import * def get_batch(d, idx, batch_size): return d[idx*batch_size:(idx+1)*batch_size] def one_hot(y, y_size): y_oh = np.zeros((y.shape[0], y_size)) y_oh[np.arange(y.shape[0]), y] = 1.0 return y_oh x_values, y_values = get_toy_data(seed=2) tmp_dir = "/home/alexeyche/bm" # [ os.remove(pj(tmp_dir, f)) for f in os.listdir(tmp_dir) if f[-4:] == ".png" ] # np.random.seed(8) input_size = x_values.shape[1] hidden_size = 100 output_size = len(np.unique(y_values)) batch_size = 100 epsilon = 1.0 beta = 1.0 n_neg = 20 n_pos = 4
def run_then_return_val_loss(num_iters, hyperparameters): epsilon = hyperparameters["epsilon"] hidden_size = hyperparameters["hidden_size"] lrate0 = hyperparameters["lrate0"] lrate1 = hyperparameters["lrate1"] n_neg = hyperparameters["n_neg"] n_pos = hyperparameters["n_pos"] beta = hyperparameters["beta"] x_values, y_values = get_toy_data(seed=2) input_size = x_values.shape[1] output_size = len(np.unique(y_values)) batch_size = 100 net_size = input_size + hidden_size + output_size sl0, sl1, sl2 = input_size, input_size + hidden_size, input_size + hidden_size + output_size def make_feed_forward(w): w[:sl0, :sl0] = 0.0 w[sl0:sl1, sl0:sl1] = 0.0 w[sl1:sl2, sl1:sl2] = 0.0 w[:sl0, sl1:sl2] = 0.0 w[sl1:sl2, :sl0] = 0.0 return w def make_lrate_matrices(lrate01, lrate12): w_lrates = np.zeros(W.shape) w_lrates[:sl0, sl0:sl1] = lrate01 w_lrates[sl0:sl1, :sl0] = lrate01 w_lrates[sl0:sl1, sl1:sl1] = lrate12 w_lrates[sl1:sl2, sl0:sl1] = lrate12 b_lrates = np.zeros(bias.shape) # b_lrates[:sl0] = lrate01 b_lrates[sl0:sl1] = lrate01 b_lrates[sl1:sl2] = lrate12 return w_lrates, b_lrates W = initialize_layer(net_size, net_size) W = (W + W.T) / 2.0 W = make_feed_forward(W) bias = np.zeros((net_size, )) act = ClipActivation() model = ExpDecayHopfield(act, W, bias) cost = MseCost() n_train = (4 * x_values.shape[0] / 5) / batch_size n_valid = (1 * x_values.shape[0] / 5) / batch_size n_batches = x_values.shape[0] / batch_size u_p = np.zeros((batch_size, net_size)) u_p_v = np.zeros((batch_size, net_size)) w_lrates, b_lrates = make_lrate_matrices(lrate0, lrate1) opt = SGDOpt([w_lrates, b_lrates]) opt.init(model.W, model.b) for e in xrange(num_iters): cost_val_stat_t, dW_stat_t, db_stat_t, acc_stat_t = 0.0, 0.0, 0.0, 0.0 mom_acc, db_acc = np.zeros(model.W.shape), np.zeros(model.b.shape) # for index in np.random.permutation(xrange(n_train)): for index in xrange(n_train): # u = u_p[index * batch_size: (index + 1) * batch_size] u = u_p.copy() x_v = get_batch(x_values, index, batch_size=batch_size) y_idx = get_batch(y_values, index, batch_size=batch_size) y_v = one_hot(y_idx, output_size) u, cost_val_neg, V_neg = model.run(u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False) u_pos, _, V_pos = model.run(u, n_pos, x_v, y_v, cost, epsilon=epsilon, clamped=True, beta=beta) u[:, :input_size] = x_v u_pos[:, :input_size] = x_v dWn, dbn = model.grad(u) dWp, dbp = model.grad(u_pos) dW = 2.0 * (dWp - dWn) / beta / batch_size db = (dbp - dbn) / beta / batch_size model.W, model.b = opt.update((model.W, dW), (model.b, db)) cost_val_stat_t += np.mean(cost_val_neg) dW_stat_t += np.mean(dW) db_stat_t += np.mean(db) acc_stat_t += np.mean(np.argmax(u[:, -output_size:], 1) != y_idx) # u_p[index * batch_size: (index + 1) * batch_size] = u u_p = u # shm(u[:,input_size:], u_pos[:,input_size:], file=pj(tmp_dir, "{}_{}_u.png".format(e, index))) cost_val_stat_v, acc_stat_v = 0.0, 0.0 for index in xrange(n_valid): dindex = n_train + index u = u_p_v.copy( ) #u_p[dindex * batch_size: (dindex + 1) * batch_size] x_v = get_batch(x_values, dindex, batch_size=batch_size) y_idx = get_batch(y_values, dindex, batch_size=batch_size) y_v = one_hot(y_idx, output_size) u, cost_val_neg, V_neg = model.run(u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False) cost_val_stat_v += np.mean(cost_val_neg) acc_stat_v += np.mean(np.argmax(u[:, -output_size:], 1) != y_idx) u[:, :input_size] = x_v # u_p[dindex*batch_size: (dindex + 1)*batch_size] = u u_p_v = u print cost_val_stat_v / n_valid, acc_stat_v / n_valid return cost_val_stat_v / n_valid
def run_then_return_val_loss(num_iters, hyperparameters): epsilon = hyperparameters["epsilon"] hidden_size = hyperparameters["hidden_size"] lrate0 = hyperparameters["lrate0"] lrate1 = hyperparameters["lrate1"] n_neg = hyperparameters["n_neg"] n_pos = hyperparameters["n_pos"] beta = hyperparameters["beta"] x_values, y_values = get_toy_data(seed=2) input_size = x_values.shape[1] output_size = len(np.unique(y_values)) batch_size = 100 net_size = input_size + hidden_size + output_size sl0, sl1, sl2 = input_size, input_size+hidden_size, input_size+hidden_size+output_size def make_feed_forward(w): w[:sl0, :sl0] = 0.0 w[sl0:sl1, sl0:sl1] = 0.0 w[sl1:sl2, sl1:sl2] = 0.0 w[:sl0, sl1:sl2] = 0.0 w[sl1:sl2, :sl0] = 0.0 return w def make_lrate_matrices(lrate01, lrate12): w_lrates = np.zeros(W.shape) w_lrates[:sl0, sl0:sl1] = lrate01 w_lrates[sl0:sl1, :sl0] = lrate01 w_lrates[sl0:sl1, sl1:sl1] = lrate12 w_lrates[sl1:sl2, sl0:sl1] = lrate12 b_lrates = np.zeros(bias.shape) # b_lrates[:sl0] = lrate01 b_lrates[sl0:sl1] = lrate01 b_lrates[sl1:sl2] = lrate12 return w_lrates, b_lrates W = initialize_layer(net_size, net_size) W = (W + W.T)/2.0 W = make_feed_forward(W) bias = np.zeros((net_size,)) act = ClipActivation() model = ExpDecayHopfield(act, W, bias) cost = MseCost() n_train = (4 * x_values.shape[0]/5)/batch_size n_valid = (1 * x_values.shape[0]/5)/batch_size n_batches = x_values.shape[0]/batch_size u_p = np.zeros((batch_size, net_size)) u_p_v = np.zeros((batch_size, net_size)) w_lrates, b_lrates = make_lrate_matrices(lrate0, lrate1) opt = SGDOpt([w_lrates, b_lrates]) opt.init(model.W, model.b) for e in xrange(num_iters): cost_val_stat_t, dW_stat_t, db_stat_t, acc_stat_t = 0.0, 0.0, 0.0, 0.0 mom_acc, db_acc = np.zeros(model.W.shape), np.zeros(model.b.shape) # for index in np.random.permutation(xrange(n_train)): for index in xrange(n_train): # u = u_p[index * batch_size: (index + 1) * batch_size] u = u_p.copy() x_v = get_batch(x_values, index, batch_size=batch_size) y_idx = get_batch(y_values, index, batch_size=batch_size) y_v = one_hot(y_idx, output_size) u, cost_val_neg, V_neg = model.run( u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False ) u_pos, _, V_pos = model.run( u, n_pos, x_v, y_v, cost, epsilon=epsilon, clamped=True, beta=beta ) u[:, :input_size] = x_v u_pos[:, :input_size] = x_v dWn, dbn = model.grad(u) dWp, dbp = model.grad(u_pos) dW = 2.0 * (dWp - dWn) / beta / batch_size db = (dbp - dbn) / beta / batch_size model.W, model.b = opt.update((model.W, dW), (model.b, db)) cost_val_stat_t += np.mean(cost_val_neg) dW_stat_t += np.mean(dW) db_stat_t += np.mean(db) acc_stat_t += np.mean(np.argmax(u[:,-output_size:],1) != y_idx) # u_p[index * batch_size: (index + 1) * batch_size] = u u_p = u # shm(u[:,input_size:], u_pos[:,input_size:], file=pj(tmp_dir, "{}_{}_u.png".format(e, index))) cost_val_stat_v, acc_stat_v = 0.0, 0.0 for index in xrange(n_valid): dindex = n_train+index u = u_p_v.copy() #u_p[dindex * batch_size: (dindex + 1) * batch_size] x_v = get_batch(x_values, dindex, batch_size=batch_size) y_idx = get_batch(y_values, dindex, batch_size=batch_size) y_v = one_hot(y_idx, output_size) u, cost_val_neg, V_neg = model.run( u, n_neg, x_v, y_v, cost, epsilon=epsilon, clamped=False ) cost_val_stat_v += np.mean(cost_val_neg) acc_stat_v += np.mean(np.argmax(u[:,-output_size:],1) != y_idx) u[:, :input_size] = x_v # u_p[dindex*batch_size: (dindex + 1)*batch_size] = u u_p_v = u print cost_val_stat_v/n_valid, acc_stat_v/n_valid return cost_val_stat_v/n_valid