def run_trial(f_dfs): X = genX(n) Y = genY(X) batch_fn = make_flat_batch_fn(X, Y, n_per_batch) Xvalid = genX(1000) Yvalid = genY(Xvalid) # Yrms = rms(Y, axis=1).mean() # Yvalidrms = rms(Yvalid, axis=1).mean() # --- initial weights weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) # genB = lambda shape: initial_w(shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.2) genB = lambda shape: initial_w( shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.3) directBs = [genB((dout, dhid)) for dhid in dhids] def test_derivative(f, df): get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fas_learner = FASkipLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='DFA') fas_learner.Bs = directBs learners = [bp_learner, fas_learner] for learner in learners: learner.train(1, batch_fn) for learner in learners: print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(learner.network.weights))) return learners results = [] for f_df in f_dfs: results.append(test_derivative(*f_df)) return results
def run_trial(f_dfs): trial_seed = rng.randint(2**30) # --- initial weights weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) genB = lambda shape: initial_w( shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng) directBs = [genB((dout, dhid)) for dhid in dhids] def test_derivative(f, df): # batch_fn = make_flat_batch_fn(X, Y, n_per_batch) get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='FA') fas_learner.Bs = directBs learners = [bp_learner, fas_learner] for learner in learners: batch_fn = make_random_batch_fn( trainX, trainY, n_per_batch, rng=np.random.RandomState(trial_seed)) learner.train(epochs, batch_fn, test_set=test_set) # for learner in learners: # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) # for i, w in enumerate(learner.network.weights))) return learners results = [] for f_df in f_dfs: results.append(test_derivative(*f_df)) return results
def test_initial_w_ortho_angle(): ws = [initial_w((2, 2), kind='ortho') for _ in range(100000)] angle0 = [np.arctan2(*w[0]) for w in ws] angle1 = [np.arctan2(*w[1]) for w in ws] plt.subplot(211) plt.hist(angle0, bins=51) plt.subplot(212) plt.hist(angle1, bins=51) plt.show()
def test_derivative(f, df): get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] # fa_learner = FALearner( # get_network(), squared_cost, rms_error, eta=eta, alpha=alpha) # fa_learner.Bs = [initial_w((j, i), kind='ortho', scale=2) # for i, j in zip(dhids, dhids[1:] + [dout])] # fa_learner.bp_angles = [] # # fa_learner.pbp_angles = [] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, name='FA') genB = lambda shape: initial_w( shape, kind='ortho', normkind='rightmean', scale=0.2) fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids] # learners = [bp_learner, fa_learner] learners = [bp_learner, fas_learner] for learner in learners: learner.train(epochs, batch_fn, test_set=test_set) for learner in learners: print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(learner.network.weights))) return learners
def objective(args): sargs = arg_string(args) w_kind = args['w_kind'] w_scale = args['w_scale'] b_kind = args['b_kind'] # b_scale = args['b_scale'] eta = args['eta'] # alpha = args['alpha'] alpha = 0 # eta = [args['eta0'], args['eta1'], args['eta2']] # max_cost = -np.inf costs = [] for _ in range(5): f, df = static_f_df(tau_rc=0.05, **args['neuron_type']) weights = initial_weights(sizes, kind=w_kind, scale=w_scale, rng=rng) # --- learners network = Network(weights, f=f, df=df, biases=None) # network = Network(weights, f=f, df=df, biases=None, noise=1.) learner = Learner(network, squared_cost, rms_error, eta=eta, alpha=alpha) # learner.Bs = [initial_w((dout, dhid), kind=b_kind, scale=b_scale) for dhid in dhids] learner.Bs = [initial_w((dout, dhid), kind=b_kind, normkind='rightmean') for dhid in dhids] learner.train(1, batch_fn, verbose=0) y = learner.network.predict(Xvalid) cost = rms(y - Yvalid, axis=1).mean() / Yvalidrms costs.append(cost) costs = sorted(costs)[1:-1] # drop largest and smallest cost = np.mean(costs) status = hyperopt.STATUS_OK if np.isfinite(cost) else hyperopt.STATUS_FAIL print("%s: %0.3e" % (sargs, cost)) return dict(loss=cost, status=status)
batch_fn = make_random_batch_fn(trainX, trainY, n_per_batch, rng=np.random.RandomState(5)) sizes = [din] + dhids + [dout] # --- initial weights weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.4) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.1) genB = lambda shape: initial_w( shape, kind='identity', normkind='rightmean', scale=0.2) Bs = [genB((d1, d0)) for d0, d1 in zip(dhids, dhids[1:] + [dout])] Bs_direct = [genB((dout, dhid)) for dhid in dhids] def combine_Bs(Bs): Bs_combined = [Bs[-1]] for B in Bs[-2::-1]: Bs_combined.insert(0, np.dot(Bs_combined[0], B)) return Bs_combined for B, Bc, Bd in zip(Bs, combine_Bs(Bs), Bs_direct): B *= norm(B) / norm(Bc) print("B norms: %s" % ", ".join("%0.3f" % norm(B) for B in Bs)) print("Bc norms: %s" % ", ".join("%0.3f" % norm(B) for B in combine_Bs(Bs)))
cost, error, eta=eta, alpha=alpha) bp_learner = BPLearner(get_network(), cost, error, eta=eta, alpha=alpha) # bp_learner.weight_norms = [] # fa_learner = FALearner( # get_network(), cost, error, eta=eta, alpha=alpha) # fa_learner.Bs = [initial_w((j, i), kind='ortho', scale=2) # for i, j in zip(dhids, dhids[1:] + [dout])] # fa_learner.bp_angles = [] fas_learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha) fas_learner.Bs = [ initial_w((dout, dhid), kind='ortho', scale=2) for dhid in dhids ] # epochs = 1 # epochs = 3 epochs = 10 # epochs = 30 # learners = [shallow_learner, bp_learner, fas_learner] learners = [bp_learner, fas_learner] for learner in learners: learner.train(epochs, batch_fn, test_set=test_subset) # --- plot plt.figure(1)
error, eta=0.5 * eta, momentum=momentum, name='BPLocal') fas_learner = FASkipLearner( get_network(), cost, error, # eta=eta, momentum=momentum, name='FA') eta=0.5 * eta, momentum=momentum, name='FA') # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.15, rng=rng) genB = lambda shape: initial_w( shape, kind='ortho', normkind='rightmean', scale=0.1, rng=rng) fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids] # learners = [bp_learner] # learners = [bpl_learner] # learners = [fas_learner] learners = [bp_learner, bpl_learner, fas_learner] batch_size = 10 batch_fn = make_flat_batch_fn(Xtrain, Ytrain, batch_size) for learner in learners: print("%s: %s" % (learner.name, weight_norm_s(learner.network.weights))) if hasattr(learner, 'Bs'): print("%s B: %s" % (learner.name, weight_norm_s(learner.Bs))) # learner.train(5, batch_fn, test_set=(Xtest, Ytest))
# n_per_batch = 20 n_per_batch = 100 batch_fn = make_flat_batch_fn(trainX, trainY, n_per_batch) learner = FASkipLearner(get_network(), cost, error, eta=eta, alpha=alpha, momentum=momentum) if 0: Bs = [np.zeros((n_out, n_hid)) for n_hid in n_hids] elif 1: Bs = [ initial_w((n_out, n_hid), kind='ortho', scale=0.1) for n_hid in n_hids ] else: scale = 0.1 Bs = [] for h in (h0, h1): hmean, hstd = h.mean(axis=0), h.std(axis=0) hsilent = hstd <= 1e-16 h = (h - hmean) / np.maximum(hstd, 1e-16) # h = (h - hmean) * (scale / np.maximum(hstd, 1e-16)) B = np.dot(trainT.T, h) / trainT.sum(0)[:, None] if hsilent.sum() > 0: Bstd = B[:, ~hsilent].std(axis=0).mean() B[:, hsilent] = rng.normal(scale=Bstd, size=(B.shape[0], hsilent.sum()))
name='Shallow') bp_learner = BPLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fa_learner = FALearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha) fa_learner.Bs = [ initial_w((j, i), kind='ortho', scale=2) for i, j in zip(dhids, dhids[1:] + [dout]) ] fa_learner.bp_angles = [] # fa_learner.pbp_angles = [] fas_learner = FASkipLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='Our model') # fas_learner.Bs = [initial_w((dout, dhid), kind='ortho', scale=2) for dhid in dhids] fas_learner.Bs = [ initial_w((dout, dhid), kind='ortho', normkind='rightmean') for dhid in dhids