def test_pbgn(seed, reuse_block_inds): """ Test the Generalised Newton's method for optimisation, using parallel block-diagonal approximations. TODO: combine this and the gradient descent test (and any optimisers implemented in future, EG adam, PSO) in to a single parametrised test """ # Set the random seed np.random.seed(seed) # Generate random number of iterations, network, data, and results file n_iters = np.random.randint(10, 20) n = NeuralNetwork(input_dim=1, output_dim=1, num_hidden_units=[4, 8, 6], act_funcs=[activations.gaussian, activations.identity]) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) name = "Test PBGN without line-search, reuse_block_inds={}".format( reuse_block_inds) results_filename = "{}.txt".format(name) results_path = os.path.join(output_dir, results_filename) results_file = open(results_path, "w") result = optimisers.Result(name=name, verbose=True, file=results_file) # Call gradient descent function result_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(i_lim=n_iters), evaluator=optimisers.Evaluator(i_interval=1), result=result, reuse_block_inds=reuse_block_inds) results_file.close()
def make_smudge_plots(objective): smudge_plot(objective, lambda f, x: optimisers.gradient_descent( f, x, n_iters=1, line_search_flag=False, learning_rate=2e-1 ), name="SGD smudge plot") smudge_plot(objective, lambda f, x: optimisers.gradient_descent( f, x, n_iters=1, line_search_flag=True, beta=0.99, alpha=0.2 # ), name="SGD+LS smudge plot", n_lines=3) ), name="SGD+LS smudge plot", nx0_sm=6, nx1_sm=6, n_lines=2) smudge_plot(objective, lambda f, x: optimisers.generalised_newton( f, x, n_iters=1, line_search_flag=False, learning_rate=0 ), name="GN smudge plot") smudge_plot(objective, lambda f, x: optimisers.rectified_newton( f, x, n_iters=1, line_search_flag=False, learning_rate=0 ), name="RN smudge plot")
def run_experiment(dataset, num_units, num_layers, log10_learning_rate, max_block_size, log10_max_step, reuse_block_inds, act_func): n = NeuralNetwork(input_dim=1, output_dim=1, num_hidden_units=[num_units for _ in range(num_layers)], act_funcs=[act_func, activations.Identity()]) result = optimisers.generalised_newton( n, dataset, learning_rate=pow(10, log10_learning_rate), max_block_size=max_block_size, max_step=pow(10, log10_max_step), terminator=optimisers.Terminator(t_lim=3), evaluator=optimisers.Evaluator(t_interval=0.1), line_search=None) return result
def compare_optimisers_dimensional_efficiency(): n_dims_list = np.unique(np.logspace(0, 3, 10, dtype=np.int)) optimiser_list = [ lambda f, x, f_lim: optimisers.gradient_descent(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), lambda f, x, f_lim: optimisers.generalised_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), lambda f, x, f_lim: optimisers.block_generalised_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), lambda f, x, f_lim: optimisers.parallel_block_generalised_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5, block_size=3), lambda f, x, f_lim: optimisers.rectified_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), ] plot_dimensional_efficiency(optimiser_list, objectives.Gaussian, n_dims_list, distance_ratio=3, name="Dimensional efficiency of different optimisers")
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - batch_size: positive integer batch size to use for training - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network and store initial parameters n = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_gd_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_ls) # Try again without line search n.set_parameter_vector(w0) result_gd_no_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_no_ls) # Call generalised Newton function n.set_parameter_vector(w0) result_pbgn_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_ls) # Try again without line search n.set_parameter_vector(w0) result_pbgn_no_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_no_ls) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plot_name = "Comparing gradient descent vs generalised Newton" plot_name += ", %iD-%iD data" % (input_dim, output_dim) plot_name += ", %.2g s training time" % t_lim plot_name += ", %s hidden units" % str(num_hidden_units) plotting.plot_training_curves(results_list, plot_name, output_dir, e_lims=e_lims)