def objective(args): sargs = arg_string(args) w_kind = args['w_kind'] w_scale = args['w_scale'] eta = args['eta'] # eta = [args['eta0'], args['eta1'], args['eta2']] max_cost = -np.inf for _ in range(5): f, df = static_f_df(tau_rc=0.05, **args['neuron_type']) weights = initial_weights(sizes, kind=w_kind, scale=w_scale, rng=rng) # weights = initial_weights(sizes, kind='ortho', rng=rng) # lsuv(X, weights, f, target_input=True, target_std=1, verbose=1) # lsuv(X[:100], weights, f, target_input=True, target_std=1, verbose=1) # --- learners network = Network(weights, f=f, df=df, biases=None) # network = Network(weights, f=f, df=df, biases=None, noise=1.) learner = Learner(network, squared_cost, rms_error, eta=eta, alpha=alpha, momentum=momentum) learner.train(1, batch_fn, verbose=0) y = learner.network.predict(Xvalid) mean_cost = rms(y - Yvalid, axis=1).mean() max_cost = max(max_cost, mean_cost) print("%s: %0.3e" % (sargs, max_cost)) return max_cost
def run_trial(f_dfs): X = genX(n) Y = genY(X) batch_fn = make_flat_batch_fn(X, Y, n_per_batch) Xvalid = genX(1000) Yvalid = genY(Xvalid) # Yrms = rms(Y, axis=1).mean() # Yvalidrms = rms(Yvalid, axis=1).mean() # --- initial weights weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) # genB = lambda shape: initial_w(shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.2) genB = lambda shape: initial_w( shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.3) directBs = [genB((dout, dhid)) for dhid in dhids] def test_derivative(f, df): get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fas_learner = FASkipLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='DFA') fas_learner.Bs = directBs learners = [bp_learner, fas_learner] for learner in learners: learner.train(1, batch_fn) for learner in learners: print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(learner.network.weights))) return learners results = [] for f_df in f_dfs: results.append(test_derivative(*f_df)) return results
def run_trial(f_dfs): trial_seed = rng.randint(2**30) # --- initial weights weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) genB = lambda shape: initial_w( shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng) directBs = [genB((dout, dhid)) for dhid in dhids] def test_derivative(f, df): # batch_fn = make_flat_batch_fn(X, Y, n_per_batch) get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='FA') fas_learner.Bs = directBs learners = [bp_learner, fas_learner] for learner in learners: batch_fn = make_random_batch_fn( trainX, trainY, n_per_batch, rng=np.random.RandomState(trial_seed)) learner.train(epochs, batch_fn, test_set=test_set) # for learner in learners: # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) # for i, w in enumerate(learner.network.weights))) return learners results = [] for f_df in f_dfs: results.append(test_derivative(*f_df)) return results
def objective(args): sargs = arg_string(args) w_kind = args['w_kind'] w_scale = args['w_scale'] b_kind = args['b_kind'] # b_scale = args['b_scale'] eta = args['eta'] # alpha = args['alpha'] alpha = 0 # eta = [args['eta0'], args['eta1'], args['eta2']] # max_cost = -np.inf costs = [] for _ in range(5): f, df = static_f_df(tau_rc=0.05, **args['neuron_type']) weights = initial_weights(sizes, kind=w_kind, scale=w_scale, rng=rng) # --- learners network = Network(weights, f=f, df=df, biases=None) # network = Network(weights, f=f, df=df, biases=None, noise=1.) learner = Learner(network, squared_cost, rms_error, eta=eta, alpha=alpha) # learner.Bs = [initial_w((dout, dhid), kind=b_kind, scale=b_scale) for dhid in dhids] learner.Bs = [initial_w((dout, dhid), kind=b_kind, normkind='rightmean') for dhid in dhids] learner.train(1, batch_fn, verbose=0) y = learner.network.predict(Xvalid) cost = rms(y - Yvalid, axis=1).mean() / Yvalidrms costs.append(cost) costs = sorted(costs)[1:-1] # drop largest and smallest cost = np.mean(costs) status = hyperopt.STATUS_OK if np.isfinite(cost) else hyperopt.STATUS_FAIL print("%s: %0.3e" % (sargs, cost)) return dict(loss=cost, status=status)
# n_per_batch = 2 # n_per_batch = 5 # n_per_batch = 10 n_per_batch = 20 # n_per_batch = 100 # batch_fn = make_flat_batch_fn(trainX, trainY, n_per_batch) batch_fn = make_random_batch_fn(trainX, trainY, n_per_batch, rng=np.random.RandomState(5)) sizes = [din] + dhids + [dout] # --- initial weights weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.4) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.1) genB = lambda shape: initial_w( shape, kind='identity', normkind='rightmean', scale=0.2) Bs = [genB((d1, d0)) for d0, d1 in zip(dhids, dhids[1:] + [dout])] Bs_direct = [genB((dout, dhid)) for dhid in dhids] def combine_Bs(Bs): Bs_combined = [Bs[-1]] for B in Bs[-2::-1]: Bs_combined.insert(0, np.dot(Bs_combined[0], B))
dtp_cheat = False def cost(y, yinds, pointers=pointers): return pointer_squared_cost_on_inds(y, yinds, pointers) def error(y, yinds, pointers=pointers): return pointer_class_error_on_inds(y, yinds, pointers) din = trainX.shape[1] sizes = [din] + dhids + [dout] batch_fn = make_flat_batch_fn(trainX, trainY, n_per_batch) # weights = initial_weights(sizes, kind='uniform', scale=0.001, rng=rng) weights = initial_weights(sizes, kind='ortho', scale=1, rng=rng) f, df = static_f_df('liflinear', tau_rc=0.05, amplitude=0.024) # eta = 1e-1 # eta = 5e-2 # eta = 2e-2 # eta = 1e-2 eta = 4e-3 # eta = 1e-3 # eta = 5e-4 # eta = 2e-4 # eta = 1e-4 # eta = 1e-5 # eta = 0
Xtrain = 2. * rng.randint(0, 2, size=(ntrain, din)) - 1 Ytrain = xor_reduce(Xtrain, axis=1, keepdims=1) Xtest = 2. * rng.randint(0, 2, size=(ntest, din)) - 1 Ytest = xor_reduce(Xtest, axis=1, keepdims=1) cost = squared_cost # error = rms_error error = binary_classification_error # --- learn # weights = initial_weights([din] + dhids + [dout], kind='uniform', scale=0.1) # weights = initial_weights([din] + dhids + [dout], kind='uniform', scale=0.03, rng=rng) weights = initial_weights([din] + dhids + [dout], kind='uniform', scale=0.1, rng=rng) # weights = initial_weights([din] + dhids + [dout], kind='uniform', scale=0.2, rng=rng) # weights = initial_weights([din] + dhids + [dout], kind='uniform', scale=0.4, rng=rng) # biases = 0 def decoder(n): W = np.zeros((n, n // 4)) for i in range(n // 4): W[4 * i:4 * (i + 1), i] = 0.5 * np.array([1, 1, -1, -1]) return W def encoder(n): W = np.zeros((n // 2, n))
# eta = 2e-3 # eta = 5e-3 t_train = 5. t_test = 0.001 t0 = 0 t1 = t0 + t_train t2 = t1 + t_test din = Xtrain.shape[1] dout = n_labels sizes = [din] + dhids + [dout] # --- model weights = initial_weights([2*din] + dhids + [dout], kind='uniform', scale=4e-3, rng=rng) # weights = initial_weights([2*din] + dhids + [dout], kind='uniform', scale=4e-8, rng=rng) # neuron_type = nengo.LIF(tau_rc=0.05, amplitude=0.0014) neuron_type = nengo.LIF(tau_rc=0.05, amplitude=0.024) synapse = nengo.synapses.Alpha(0.003) model = nengo.Network() model.config[nengo.Ensemble].neuron_type = neuron_type model.config[nengo.Connection].synapse = synapse pdt = 0.01 network_args = dict(t0=t0, t1=t1, eta=eta, seed=2, n_output=20, n_error=20, o_encoders=eye_encoders(dout), e_encoders=eye_encoders(dout), e_intercepts=Uniform(0, 0.8), pdt=pdt)
def objective(args): eta = args['eta'] prestime = args['prestime'] n_examples = int(np.ceil(t_train / prestime)) # dataset T = orthogonalize(rng.normal(size=(din, dout))) genX = lambda n: rng.normal(scale=0.5, size=(n, din)) genY = lambda X: np.dot(X, T) X = genX(n_examples) Y = genY(X) x_process = nengo.processes.PresentInput(X, prestime) y_process = nengo.processes.PresentInput(Y, prestime) Xtest = genX(1000) Ytest = genY(Xtest) Ytestrms = rms(Ytest, axis=1).mean() weights = initial_weights([2 * din] + dhids + [dout], kind='gaussian', scale=5e-4, rng=rng) fa_args = dict(eta=eta, seed=2, n_output=20, n_error=20, o_encoders=eye_encoders(dout), e_encoders=eye_encoders(dout), e_intercepts=Uniform(0, 0.8), b_kind='gaussian', b_scale=1.7) model = nengo.Network() model.config[nengo.Ensemble].neuron_type = neuron_type model.config[nengo.Connection].synapse = synapse with model: x = nengo.Node(x_process) y = nengo.Node(y_process) xe = Encoder(x, seed=1) learner = FATwoStepNetwork(xe.output, y, weights, **fa_args) xp = nengo.Probe(x) yp = nengo.Probe(y) xep = nengo.Probe(xe.output) with nengo.Simulator(model) as sim: sim.run(t_train) XEtest = xe.encode(Xtest, sim=sim) Ztest = learner.forward(sim, XEtest) cost = rms(Ztest - Ytest, axis=1).mean() / Ytestrms # save dt = sim.dt t = sim.trange() x = sim.data[xp] xe = sim.data[xep] y = sim.data[yp] z = sim.data[learner.yp] data = dict( eta=eta, prestime=prestime, dt=dt, t_train=t_train, din=din, dhids=dhids, dout=dout, T=T, X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, XEtest=XEtest, Ztest=Ztest, # t=t, x=x, xe=xe, y=y, z=z, cost=cost) rargs = '_'.join('%s=%r' % (k, v) for k, v in args.items()) filename = os.path.join(filedir, 'trial_%s.npz' % (rargs)) np.savez(filename, **data) print("Saved %r" % filename) # result status = hyperopt.STATUS_OK if np.isfinite(cost) else hyperopt.STATUS_FAIL sargs = arg_string(args) print("%s: %0.3e" % (sargs, cost)) return dict(loss=cost, status=status)
# f, df = static_f_df('liflinear', tau_rc=0.05, amplitude=1./50) # f, df = static_f_df('liflinear', tau_rc=0.05, amplitude=0.005) f, df = static_f_df('liflinear', tau_rc=0.05, amplitude=0.01) # f, df = static_f_df('liflinear', tau_rc=0.05, amplitude=0.004) # --- initial weights # weights = initial_weights(sizes, kind='uniform', scale=0.01, rng=rng) # weights = initial_weights(sizes, kind='uniform', scale=0.2, rng=rng) # weights = initial_weights(sizes, kind='ortho', scale=0.003, rng=rng) # weights = initial_weights(sizes, kind='ortho', scale=0.06, rng=rng) # weights = initial_weights(sizes, kind='ortho', scale=0.3, rng=rng) # weights = initial_weights(sizes, kind='ortho', rng=rng) # weights = initial_weights(sizes, kind='ortho', scale=2, rng=rng) weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) # weights = initial_weights(sizes, kind='gaussian', scale=3e-2, rng=rng) # weights = initial_weights(sizes, kind='gaussian', scale=3e-3, rng=rng) # weights = initial_weights(sizes, kind='uniform', scale=1.7e-2, rng=rng) # lsuv(X, weights, f, verbose=1) # lsuv(X, weights, f, target_input=True, target_std=5, verbose=1) # lsuv(X, weights, f, target_input=True, target_std=2, verbose=1) print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) # --- network noise = 0 # noise = 1.
genY = lambda X: np.dot(X, T) freq = (0.5 / np.pi) / prestime # angular frequency = 1. / prestime p = nengo.processes.WhiteSignal(n_examples * prestime, freq, rms=0.5) X = p.run(n_examples * prestime, d=din, dt=0.001, rng=rng) Y = np.dot(X, T) x_process = lambda t: X[int(t / 0.001) % len(X)] y_process = lambda t: Y[int(t / 0.001) % len(Y)] Xtest = genX(1000) Ytest = genY(Xtest) # weights = initial_weights([2*din] + dhids + [dout], kind='ortho', rng=rng) # weights = initial_weights([2*din] + dhids + [dout], kind='uniform', scale=0.2, rng=rng) # weights = initial_weights([2*din] + dhids + [dout], kind='uniform', scale=0.4, rng=rng) weights = initial_weights([2 * din] + dhids + [dout], kind='gaussian', scale=4.4e-4, rng=rng) # weights = initial_weights([2*din] + dhids + [dout], kind='zeros') # neuron_type = nengo.LIF(tau_rc=0.05, amplitude=0.005) neuron_type = nengo.LIF(tau_rc=0.05, amplitude=0.024) # synapse = nengo.synapses.Alpha(0.005) synapse = nengo.synapses.Alpha(0.003) # synapse_n = lambda n: reduce(lambda a, b: a.combine(b), [synapse] * n) model = nengo.Network() model.config[nengo.Ensemble].neuron_type = neuron_type model.config[nengo.Connection].synapse = synapse # network_args = dict(t0=t0, t1=t1, eta=eta, seed=2) # network_args = dict(n_output=20, n_error=20, t0=t0, t1=t1, eta=eta, seed=2) # network_args = dict(n_output=40, n_error=40, t0=t0, t1=t1, eta=eta, seed=2)