def run_trial(f_dfs): X = genX(n) Y = genY(X) batch_fn = make_flat_batch_fn(X, Y, n_per_batch) Xvalid = genX(1000) Yvalid = genY(Xvalid) # Yrms = rms(Y, axis=1).mean() # Yvalidrms = rms(Yvalid, axis=1).mean() # --- initial weights weights = initial_weights(sizes, kind='gaussian', scale=1e-1, rng=rng) # print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(weights))) # genB = lambda shape: initial_w(shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.2) genB = lambda shape: initial_w( shape, rng=rng, kind='ortho', normkind='rightmean', scale=0.3) directBs = [genB((dout, dhid)) for dhid in dhids] def test_derivative(f, df): get_network = lambda **kwargs: Network( weights, f=f, df=df, biases=None, noise=0, **kwargs) bp_learner = BPLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='BP') bp_learner.weight_norms = [] fas_learner = FASkipLearner(get_network(), squared_cost, rms_error, eta=eta, alpha=alpha, name='DFA') fas_learner.Bs = directBs learners = [bp_learner, fas_learner] for learner in learners: learner.train(1, batch_fn) for learner in learners: print(", ".join("||W%d|| = %0.3f" % (i, norm(w)) for i, w in enumerate(learner.network.weights))) return learners results = [] for f_df in f_dfs: results.append(test_derivative(*f_df)) return results
pointers = UniformHypersphere(surface=True).sample(n_labels, d=dout, rng=rng) dtp_cheat = False def cost(y, yinds, pointers=pointers): return pointer_squared_cost_on_inds(y, yinds, pointers) def error(y, yinds, pointers=pointers): return pointer_class_error_on_inds(y, yinds, pointers) din = trainX.shape[1] sizes = [din] + dhids + [dout] batch_fn = make_flat_batch_fn(trainX, trainY, n_per_batch) # weights = initial_weights(sizes, kind='uniform', scale=0.001, rng=rng) weights = initial_weights(sizes, kind='ortho', scale=1, rng=rng) f, df = static_f_df('liflinear', tau_rc=0.05, amplitude=0.024) # eta = 1e-1 # eta = 5e-2 # eta = 2e-2 # eta = 1e-2 eta = 4e-3 # eta = 1e-3 # eta = 5e-4 # eta = 2e-4 # eta = 1e-4
alpha = 0 # alpha = 1e-8 # alpha = 1e-6 # Learner = ShallowLearner Learner = BPLearner # --- problem dataset T = orthogonalize(rng.normal(size=(din, dout))) genX = lambda n: rng.normal(scale=1., size=(n, din)) genY = lambda X: np.dot(X, T) X = genX(n) Y = genY(X) batch_fn = make_flat_batch_fn(X, Y, n_per_batch) Xvalid = genX(10000) Yvalid = genY(Xvalid) def arg_string(args): return "{%s}" % ", ".join( "%s: %s" % (k, arg_string(v)) if isinstance(v, dict) else "%s: %0.1e" % (k, v) if is_number(v) else "%s: %s" % (k, v) for k, v in args.items()) def objective(args): sargs = arg_string(args) w_kind = args['w_kind'] w_scale = args['w_scale']
eta=0.5 * eta, momentum=momentum, name='FA') # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.2, rng=rng) # genB = lambda shape: initial_w(shape, kind='ortho', normkind='rightmean', scale=0.15, rng=rng) genB = lambda shape: initial_w( shape, kind='ortho', normkind='rightmean', scale=0.1, rng=rng) fas_learner.Bs = [genB((dout, dhid)) for dhid in dhids] # learners = [bp_learner] # learners = [bpl_learner] # learners = [fas_learner] learners = [bp_learner, bpl_learner, fas_learner] batch_size = 10 batch_fn = make_flat_batch_fn(Xtrain, Ytrain, batch_size) for learner in learners: print("%s: %s" % (learner.name, weight_norm_s(learner.network.weights))) if hasattr(learner, 'Bs'): print("%s B: %s" % (learner.name, weight_norm_s(learner.Bs))) # learner.train(5, batch_fn, test_set=(Xtest, Ytest)) # learner.train(10, batch_fn, test_set=(Xtest, Ytest)) # learner.train(15, batch_fn, test_set=(Xtest, Ytest)) # learner.train(25, batch_fn, test_set=(Xtest, Ytest)) # learner.train(50, batch_fn, test_set=(Xtest, Ytest)) learner.train(epochs, batch_fn, test_set=(Xtest, Ytest)) # print(learner.test((Xtest, Ytest)).mean()) # print(np.round(learner.network.weights[0].T, 3))
solver2 = hunse_thesis.solvers.SoftmaxSGD(n_epochs=2, reg=reg, eta=eta, momentum=momentum, batch_size=batch_size, verbose=1) # solver = hunse_thesis.solvers.SoftmaxSGD( # reg=0.001, n_epochs=1, eta=1e-4, momentum=0.9, batch_size=100, verbose=1) # W1 = solver(h1[:100], trainT[:100], X=weights[-1]) W1, _ = solver2(h1, trainT, X=weights[-1]) train_out2 = np.dot(h1, W1) train_cost2 = cost(train_out2, trainY)[0].mean() print(train_cost2) batch_fn = make_flat_batch_fn(trainX, trainY, batch_size) learner = ShallowLearner(get_network(), cost, error, eta=eta, alpha=alpha, momentum=momentum) learner.train(2, batch_fn, test_set=test) train_out3 = learner.network.predict(trainX) train_cost3 = cost(train_out3, trainY)[0].mean() train_error3 = error(train_out3, trainY).mean() print((train_cost3, train_error3)) assert 0