def test_step_size_column(): """ Test using step size column with a Result object """ set_random_seed_from_args("test_step_size_column") n_train = np.random.randint(10, 20) n_its = np.random.randint(10, 20) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) test_name = "Test line search column" output_filename = "test_step_size_column.txt" with open(os.path.join(output_dir, output_filename), "w") as f: ls = optimisers.LineSearch() result = optimisers.Result( name=test_name, file=f, add_default_columns=True, ) result.add_column(optimisers.results.columns.StepSize(ls)) optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, )
def test_plot_optimal_batch_sizes(): """ Test function which plots the optimal batch size, rate of reduction of the mean test set error, and train and test error, against the current iteration throughout the course of model-optimisation """ # Set random seed and initialise network and dataset np.random.seed(102) n_train = 10 n_its = 2 model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) # Initialise Result, LineSearch and OptimalBatchSize column objects result = optimisers.Result(verbose=False) line_search = optimisers.LineSearch() gd_optimiser = optimisers.GradientDescent(line_search) columns = optimisers.results.columns optimal_batch_size_col = columns.OptimalBatchSize(gd_optimiser, sin_data.train.n, n_repeats=3, n_batch_sizes=3, min_batch_size=2) result.add_column(optimal_batch_size_col) # Call optimisation function gd_optimiser.optimise( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), ) # Call plotting function plotting.plot_optimal_batch_sizes("Test plot_optimal_batch_sizes", output_dir, result, optimal_batch_size_col)
def test_plot_result_attribute(): """ Test plotting function for plotting the values in one of the columns of a Result object over time """ np.random.seed(1521) n_its = np.random.randint(10, 20) results_list = [] for i in range(5): i = min(i, 2) name = "test_plot_result_attribute_%i" % i output_text_filename = os.path.join(output_dir, name + ".txt") with open(output_text_filename, "w") as f: result = optimisers.Result(name=name, file=f, add_default_columns=False) ls = optimisers.LineSearch() ls_column = optimisers.results.columns.StepSize(ls) result.add_column(ls_column) result.add_column(optimisers.results.columns.Iteration()) result.begin() for j in range(n_its): ls.s = np.random.uniform() + i result.update(iteration=j) results_list.append(result) plotting.plot_result_attribute("test_plot_result_attribute_linesearch", output_dir, results_list, attribute=type(ls_column), marker="o", line_style="")
def test_plot_predictions_gif(dataset_type): """ Test function to make a gif of predictions formed by the model during training, for regression or classification """ # Set random seed and initialise network and dataset set_random_seed_from_args("test_plot_predictions_gif", dataset_type) n_train = np.random.randint(10, 20) n_pred = np.random.randint(5, 10) n_its = 2 if issubclass(dataset_type, data.Regression): input_dim = 1 output_dim = 1 dataset_kwargs = {"x_lo": -1, "x_hi": 1, "freq": 1} elif issubclass(dataset_type, data.Classification): input_dim = 2 output_dim = 3 dataset_kwargs = {} else: raise ValueError( "dataset_type %r must be a subclass of data.Regression or " "data.Classification" % dataset_type) dataset = dataset_type( input_dim=input_dim, output_dim=output_dim, n_train=n_train, **dataset_kwargs, ) model = get_random_network( input_dim=input_dim, output_dim=output_dim, initialiser=models.initialisers.ConstantPreActivationStatistics( x_train=dataset.train.x, y_train=dataset.train.y, ), ) # Initialise Result and Predictions column object result = optimisers.Result(verbose=False) pred_column = optimisers.results.columns.Predictions(dataset, n_pred) result.add_column(pred_column) # Call optimisation function optimisers.gradient_descent( model, dataset, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=optimisers.LineSearch(), ) # Call plotting function plot_name = ("test_plot_predictions_gif, dataset_type = %s" % dataset_type.__name__) plotting.plot_predictions_gif( plot_name=plot_name, dir_name=os.path.join(output_dir, plot_name), result=result, prediction_column=pred_column, dataset=dataset, output_dim=output_dim, duration=1000, )
def run_experiment( num_units, num_layers, log10_s0, alpha, beta, act_func, max_steps, batch_size, batch_replace, plot_preds=False, ): # Initialise network and batch getter model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=[num_units for _ in range(num_layers)], act_funcs=[act_func, models.activations.identity], ) if (batch_size is None) or (batch_size >= args.n_train): batch_getter = optimisers.batch.FullTrainingSet() else: batch_getter = optimisers.batch.ConstantBatchSize( batch_size, batch_replace, ) # Perform gradient descent result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=args.t_lim), evaluator=optimisers.Evaluator(t_interval=args.t_eval), line_search=optimisers.LineSearch( s0=pow(10, log10_s0), alpha=alpha, beta=beta, max_its=max_steps, ), batch_getter=batch_getter, ) # If specified, plot the final model predictions if plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir, dataset=sin_data, output_dim=args.output_dim, model=model, ) # Return the final test error TestError = optimisers.results.columns.TestError final_test_error = result.get_values(TestError)[-1] return final_test_error
def __init__( self, network, regulariser, primary_initialisation_task, secondary_initialisation_task, batch_size=50, t_lim=None, ): """ Initialise a dinosaur object """ self._network = network self._regulariser = regulariser self._batch_size = batch_size self._evaluator = optimisers.Evaluator(t_interval=0.1) self._result = optimisers.Result(name="Dinosaur") self._initialised_regulariser = False self._line_search = optimisers.LineSearch() self._result.add_column( optimisers.results.columns.StepSize(self._line_search)) self._optimiser = optimisers.GradientDescent(self._line_search) if t_lim is not None: self._timer = optimisers.Timer(t_lim) self._timer.begin() else: self._timer = None self._terminator = optimisers.DynamicTerminator( model=self._network, dataset=primary_initialisation_task, batch_size=self._batch_size, replace=True, t_lim=t_lim, ) self._result.add_column( optimisers.results.columns.BatchImprovementProbability( self._terminator)) # Get parameters from optimising the first initialisation task self._reset_terminator = False self.fast_adapt(primary_initialisation_task) self._reset_terminator = True w1 = network.get_parameter_vector().copy() # Get parameters from optimising the second initialisation task dE = self.fast_adapt(secondary_initialisation_task) w2 = network.get_parameter_vector() # Set the regulariser parameters and add to the network self._regulariser.update([w1, w2], [dE]) if isinstance(self._regulariser, Eve): self._optimiser = self._regulariser self._optimiser.set_line_search(self._line_search) eve_column = optimisers.columns.EveConvergence(self._regulariser) self._result.add_column(eve_column) else: self._network.set_regulariser(self._regulariser) self._result.add_column(optimisers.columns.RegularisationError()) self._initialised_regulariser = True
def test_plot_result_attribute_subplots(): """ Test plotting function for plotting the values in multiple columns of a Result object over time, with one subplot per column """ np.random.seed(1521) n_its = np.random.randint(10, 20) n_train = np.random.randint(10, 20) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) results_list = [] for i in range(5): model = models.NeuralNetwork(input_dim=1, output_dim=1) model.get_gradient_vector(sin_data.train.x, sin_data.train.y) name = "test_plot_result_attribute_subplots_%i" % (i + 1) output_text_filename = os.path.join(output_dir, name + ".txt") with open(output_text_filename, "w") as f: result = optimisers.Result(name=name, file=f) ls = optimisers.LineSearch() ls_column = optimisers.results.columns.StepSize(ls) dbs_metric_column = optimisers.results.columns.DbsMetric() result.add_column(ls_column) result.add_column(dbs_metric_column) optimisers.gradient_descent( model, sin_data, result=result, line_search=ls, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1)) results_list.append(result) attribute_list = [ optimisers.results.columns.TrainError, optimisers.results.columns.TestError, optimisers.results.columns.Time, type(ls_column), type(dbs_metric_column) ] plotting.plot_result_attributes_subplots( "test_plot_result_attribute_subplots", output_dir, results_list, attribute_list, marker="o", line_style="--", log_axes_attributes={ optimisers.results.columns.TrainError, optimisers.results.columns.TestError, type(ls_column) })
def test_optimal_batch_size_column(): """ Test using a column which approximates the optimal batch size on each iteration """ # Set random seed and initialise network and dataset set_random_seed_from_args("test_optimal_batch_size_column") n_train = np.random.randint(10, 20) n_its = np.random.randint(10, 20) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) # Initialise output file and Result object test_name = "Test optimal batch size column" output_filename = "test_optimal_batch_size_column.txt" with open(os.path.join(output_dir, output_filename), "w") as f: result = optimisers.Result(name=test_name, file=f, add_default_columns=True) # Initialise line-search and column object, and add to the result n_batch_sizes = np.random.randint(3, 6) n_repeats = np.random.randint(3, 6) line_search = optimisers.LineSearch() columns = optimisers.results.columns gd_optimiser = optimisers.GradientDescent(line_search) optimal_batch_size_col = columns.OptimalBatchSize( gd_optimiser, sin_data.train.n, n_repeats=n_repeats, n_batch_sizes=n_batch_sizes, ) result.add_column(optimal_batch_size_col) # Call optimisation function gd_optimiser.optimise( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), ) # Test that the OptimalBatchSize object attributes are as expected batch_size_list = optimal_batch_size_col.batch_size_list assert len(optimal_batch_size_col.reduction_dict_dict) == (n_its + 1) for reduction_dict in optimal_batch_size_col.reduction_dict_dict.values(): assert len(reduction_dict) == n_batch_sizes assert set(reduction_dict.keys()) == set(batch_size_list) for reduction_list in reduction_dict.values(): assert len(reduction_list) == n_repeats
def test_plot_hidden_outputs_gif(): """ Test function to make a gif of hidden layer outputs and predictions formed by the model during training, for regression or classification """ # Set random seed and initialise network and dataset set_random_seed_from_args("test_plot_hidden_outputs_gif") n_train = np.random.randint(10, 20) n_pred = np.random.randint(5, 10) n_its = 2 dataset = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) model = get_random_network( input_dim=1, output_dim=1, initialiser=models.initialisers.ConstantPreActivationStatistics( x_train=dataset.train.x, y_train=dataset.train.y, ), ) # Initialise Result and Predictions column object result = optimisers.Result(verbose=False) pred_column = optimisers.results.columns.Predictions( dataset, n_pred, store_hidden_layer_outputs=True, ) result.add_column(pred_column) # Call optimisation function optimisers.gradient_descent( model, dataset, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=optimisers.LineSearch(), ) # Call plotting function plot_name = "test_plot_hidden_outputs_gif" plotting.plot_hidden_outputs_gif( plot_name=plot_name, dir_name=os.path.join(output_dir, plot_name), result=result, prediction_column=pred_column, dataset=dataset, output_dim=1, duration=1000, )
def test_batch_improvement_probability_column(smooth_output): """ Test using a column which measures the probability of improvement in each consecutive batch """ set_random_seed_from_args( "test_batch_improvement_probability_column", smooth_output, ) n_train = np.random.randint(10, 20) n_its = np.random.randint(50, 100) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) batch_size = np.random.randint(3, sin_data.train.n) test_name = ( "test_batch_improvement_probability_column, smooth_output=%s" % smooth_output) output_filename = "%s.txt" % test_name with open(os.path.join(output_dir, output_filename), "w") as f: ls = optimisers.LineSearch() result = optimisers.Result( name=test_name, file=f, add_default_columns=True, ) dynamic_terminator = optimisers.DynamicTerminator( model=model, dataset=sin_data, batch_size=batch_size, smooth_output=smooth_output, i_lim=n_its, ) result.add_column( optimisers.results.columns.BatchImprovementProbability( dynamic_terminator, )) optimisers.gradient_descent( model, sin_data, result=result, terminator=dynamic_terminator, batch_getter=dynamic_terminator, evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, )
def test_test_set_improvement_probability_simple_column( smoother_type, use_cdf, ): """ Test using a column which measures the probability of improvement in the test set """ set_random_seed_from_args( "test_test_set_improvement_probability_simple_column", ) n_train = np.random.randint(10, 20) n_its = np.random.randint(50, 100) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) test_name = ("test_test_set_improvement_probability_simple_column, " "use_cdf=%s, smoother_type=%s" % (use_cdf, (None if (smoother_type is None) else smoother_type.__name__))) output_filename = "%s.txt" % test_name with open(os.path.join(output_dir, output_filename), "w") as f: ls = optimisers.LineSearch() result = optimisers.Result( name=test_name, file=f, add_default_columns=True, ) smoother = None if (smoother_type is None) else smoother_type(0) result.add_column( optimisers.results.columns.TestSetImprovementProbabilitySimple( model=model, dataset=sin_data, smoother=smoother, use_cdf=use_cdf, )) optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, )
def test_plot_error_reductions_vs_batch_size_gif(): """ Test function which plots a gif of the statistics for the reduction in the mean error in the test set after a single minimisation iteration, as a function of the batch size used for the iteration, where each frame in the gif represents a different iteration throughout the course of model-optimisation """ # Set random seed and initialise network and dataset np.random.seed(102) n_train = 10 n_its = 2 model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) # Initialise Result, LineSearch and OptimalBatchSize column objects result = optimisers.Result(verbose=False) line_search = optimisers.LineSearch() gd_optimiser = optimisers.GradientDescent(line_search) columns = optimisers.results.columns optimal_batch_size_col = columns.OptimalBatchSize(gd_optimiser, sin_data.train.n, n_repeats=3, n_batch_sizes=3, min_batch_size=2) result.add_column(optimal_batch_size_col) # Call optimisation function gd_optimiser.optimise( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), ) # Call plotting function to make gif test_output_dir = os.path.join( output_dir, "Test plot_error_reductions_vs_batch_size_gif") plotting.plot_error_reductions_vs_batch_size_gif(result, optimal_batch_size_col, test_output_dir, loop=None)
def test_gradient_descent_line_search(seed): """ Test gradient descent, using a line-search. A line-search should guarantee that each iteration reduces the error function, so this is tested using assert statements after calling the gradient_descent function. """ # Set the random seed np.random.seed(seed) # Generate random number of iterations, network, data, and results file n_iters = np.random.randint(10, 20) n = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) results_filename = ( "Test gradient descent with line-search, seed = %i.txt" % seed) results_path = os.path.join(output_dir, results_filename) results_file = open(results_path, "w") result = optimisers.Result(name="SGD with line search", verbose=True, file=results_file) # Add step size column to result ls = optimisers.LineSearch(max_its=int(1e10)) result.add_column(optimisers.results.columns.StepSize(ls)) # Call gradient descent function result_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(i_lim=n_iters), evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, result=result) # Make sure each iteration reduces the training error train_error_list = result.get_values(optimisers.results.columns.TrainError) for i in range(len(train_error_list) - 1): assert train_error_list[i + 1] < train_error_list[i] results_file.close()
help="If this argument is included, apply moving-average smoothing " "filters to the intermediate stages of the dynamic terminator " "calculations (if a dynamic terminator is being used), specifically " "the reduction in mean batch error between iterations, and the " "standard devation of batch error", action="store_true", ) # Parse arguments args = parser.parse_args() args.num_hidden_units = [ int(i) for i in args.num_hidden_units_str.split(",") ] args.line_search = None if args.no_line_search else optimisers.LineSearch() args.t_eval = args.t_lim / 50 if args.t_eval is None else args.t_eval args.dataset_type = data.dataset_class_dict[args.dataset_type_str] if args.error_lims is not None: float_fmt = lambda e: -float(e[1:]) if e.startswith("n") else float(e) args.error_lims = [float_fmt(e) for e in args.error_lims.split(",")] error_msg = "Must provide 2 comma-separated values for error_lims" assert len(args.error_lims) == 2, error_msg # Call main function using command-line arguments t_start = time.perf_counter() main(args) print("Main function run in %.3f s" % (time.perf_counter() - t_start))
def main(args): np.random.seed(args.seed) # Initialise network model network = models.NeuralNetwork( input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hidden_units=args.num_hidden_units, ) # Get output directory which is specific to the relevant script parameters param_str = " ".join([ "mnist" if args.use_mnist_data else "synthetic", "r%s" % args.regulariser, "e%s" % args.error_scale_coefficient, "u%s" % args.num_hidden_units, ]) if args.use_mnist_data: param_str += " " + " ".join([ "t%s" % args.mnist_num_train_tasks, "l%s" % args.mnist_train_distribution_label, "o%s" % args.mnist_out_of_distribution_label, ]) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Train Dinosaur", param_str, ) if os.path.isdir(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) print("Saving output plots in \"%s\"" % output_dir) os.system("explorer \"%s\"" % output_dir) # Initialise data if args.use_mnist_data: task_set, out_of_distribution_task = get_mnist_data(args) else: task_set, out_of_distribution_task = get_synthetic_data() # Initialise meta-learning model regulariser_type = models.dinosaur.regularisers.regulariser_names_dict[ args.regulariser] regulariser = regulariser_type( error_scale_coefficient=args.error_scale_coefficient, ) dinosaur = models.Dinosaur( network=network, regulariser=regulariser, primary_initialisation_task=task_set.task_list[0], secondary_initialisation_task=task_set.task_list[1], ) for _ in range(10): # Perform one outer-loop iteration of meta-learning dinosaur._result.display_headers() dinosaur.meta_learn( task_set, terminator=optimisers.Terminator(i_lim=1), ) # Check that the mean and scale are converging to sensible values print(regulariser.mean) print(regulariser.parameter_scale) print(regulariser.error_scale) # Compare adapting to an out-of-distribution task dinosaur.fast_adapt(out_of_distribution_task) # Plot training curves plotting.plot_training_curves([dinosaur._result], dir_name=output_dir) # Plot task predictions after meta-learning for i, task in enumerate(task_set.task_list): print("Plotting adaptations to task %i" % i) dinosaur.fast_adapt(task) plotting.plot_2D_regression( "Dinosaur task %i" % i, output_dir, task, OUTPUT_DIM, model=network, ) plot_hidden_activations( task, network, "Hidden activations for task %i" % i, output_dir, ) # Plot adaptation to out of distribution task print("Plotting adaptation to out of distribution task") dinosaur.fast_adapt(out_of_distribution_task) plotting.plot_2D_regression( "Dinosaur predictions for out-of-distribution task", output_dir, out_of_distribution_task, OUTPUT_DIM, model=network, ) plot_hidden_activations( out_of_distribution_task, network, "Hidden activations for out-of-distribution task", output_dir, ) # Plot adaptation to out of distribution task without regularisation print("Plotting adaptation without regularisation") if isinstance(regulariser, models.dinosaur.regularisers.Eve): ls = optimisers.LineSearch() dinosaur._optimiser = optimisers.GradientDescent(ls) else: network._regulariser.error_scale = 0 network.set_parameter_vector(regulariser.mean) dinosaur.fast_adapt(out_of_distribution_task) plotting.plot_2D_regression( "Dinosaur predictions for out-of-distribution task without " "regularisation", output_dir, out_of_distribution_task, OUTPUT_DIM, model=network, ) plot_hidden_activations( out_of_distribution_task, network, "Hidden activations for out-of-distribution task without " "regularisation", output_dir, )
def main(input_dim, output_dim, n_train, t_lim, num_hidden_units, e_lims, n_repeats, alpha_smooth, p_c, min_batch_size): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment - alpha_smooth: float in (0, 1), amount of smoothing to apply to DBS batch size """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network model = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) # Call gradient descent function result = optimisers.Result("Repeat = %i" % i) batch_getter = optimisers.batch.DynamicBatchSize( model, sin_data, alpha_smooth=alpha_smooth, prob_correct_direction=p_c, min_batch_size=min_batch_size) batch_col = optimisers.results.columns.BatchSize(batch_getter) dbs_col = optimisers.results.columns.DbsMetric() result.add_column(batch_col) result.add_column(dbs_col) result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=result, line_search=optimisers.LineSearch(), batch_getter=batch_getter) results_list.append(result) # Compare training curves plot_name_suffix = "\n%iD-%iD data" % (input_dim, output_dim) plot_name_suffix += ", %.2g s training time" % t_lim plot_name_suffix += ", %s hidden units" % str(num_hidden_units) plot_name_suffix += "\nalpha_smooth = %.3f" % alpha_smooth plot_name_suffix += ", p_c = %.3f" % p_c plot_name_suffix += ", min_batch_size = %.3f" % min_batch_size this_test_output_dir = os.path.join(output_dir, plot_name_suffix.replace("\n", "")) plotting.plot_training_curves(results_list, "DBS learning curves" + plot_name_suffix, this_test_output_dir, e_lims=e_lims) for col in [dbs_col, batch_col]: plot_name = "%s against iteration for dynamic batch size" % col.name plot_name += plot_name_suffix plotting.plot_result_attribute(plot_name, this_test_output_dir, results_list, type(col))
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for this script, wrapped by argparse for command-line arguments. """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, time limit, and results list np.random.seed(9251) sin_data = data.Sinusoidal(input_dim=input_dim, output_dim=output_dim, n_train=n_train) t_interval = t_lim / 50 results_list = [] for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantPreActivationStatistics( sin_data.x_train, sin_data.y_train)) # Set name for experiment name = "Constant pre-activation statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantParameterStatistics()) # Set name for experiment name = "Constant parameter statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plotting.plot_training_curves( results_list, "Comparing initialisers for gradient descent on 2D sinusoidal data", output_dir, e_lims=e_lims, tp=0.5)
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - batch_size: positive integer batch size to use for training - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network and store initial parameters n = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_gd_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_ls) # Try again without line search n.set_parameter_vector(w0) result_gd_no_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_no_ls) # Call generalised Newton function n.set_parameter_vector(w0) result_pbgn_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_ls) # Try again without line search n.set_parameter_vector(w0) result_pbgn_no_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_no_ls) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plot_name = "Comparing gradient descent vs generalised Newton" plot_name += ", %iD-%iD data" % (input_dim, output_dim) plot_name += ", %.2g s training time" % t_lim plot_name += ", %s hidden units" % str(num_hidden_units) plotting.plot_training_curves(results_list, plot_name, output_dir, e_lims=e_lims)
# Generate random network and data n = models.NeuralNetwork( input_dim=1, output_dim=1, num_hidden_units=[10], act_funcs=[models.activations.cauchy, models.activations.identity]) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch()) # Plot predictions x_pred = np.linspace(-2, 2, 200).reshape(1, -1) y_pred = n.forward_prop(x_pred) plotting.plot_1D_regression("Gradient descent predictions for 1D sin data", output_dir, sin_data, x_pred, y_pred) # Plot learning curve plotting.plot_training_curves( [result], "Gradient descent learning curves for 1D sin data", output_dir, e_lims=[0, 0.02])
def main( input_dim, output_dim, n_train, num_hidden_units, n_repeats, n_iters, n_plots, n_batch_sizes, min_batch_size, ylims, seed, batch_size_optimise, use_replacement, gif_duration ): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - n_repeats: positive integer number of repeats to perform of each batch size test - n_iters: total number of iterations to perform - n_plots: number of frames of the gif (equal to how many times optimisation will pause in order to sweep over the list of batch sizes) - n_batch_sizes: the number of different batch sizes to test for each iteration - min_batch_size: the smallest batch size to test - ylims: limits for the y-axes of each subplot of the output gif. Should be None, in which case the axis limits are calculated automatically, or an iterable containing 4 floats, in which the first 2 are the lower and upper axis limits for the left subplot, and the second 2 are the lower and upper axis limits for the right subplot - seed: random seed to use for the experiment - batch_size_optimise: batch size to use for standard optimisation iterations (IE not when sweeping over batch sizes). If ommitted, then the full training set is used as a batch during optimisation iterations - use_replacement: if True, then use replacement when sampling batches from the training set - gif_duration: time in seconds that the output gif should last for in total """ np.random.seed(seed) n_iters_per_plot = int(n_iters / n_plots) # Initialise model and dataset model = models.NeuralNetwork(input_dim, output_dim, num_hidden_units) freq = 1 if (input_dim == 1) else None sin_data = data.Sinusoidal(input_dim, output_dim, n_train, freq=freq) # Initialise objects for optimisation result = optimisers.Result() evaluator = optimisers.Evaluator(i_interval=n_iters_per_plot) terminator = optimisers.Terminator(i_lim=n_iters) if batch_size_optimise is None: batch_getter = optimisers.batch.FullTrainingSet() else: batch_getter = optimisers.batch.ConstantBatchSize( batch_size_optimise, use_replacement ) line_search = optimisers.LineSearch() # Initialise OptimalBatchSize column and add to the result object gd_optimiser = optimisers.GradientDescent(line_search) optimal_batch_size_col = optimisers.results.columns.OptimalBatchSize( gd_optimiser, sin_data.n_train, n_repeats=n_repeats, n_batch_sizes=n_batch_sizes ) result.add_column(optimal_batch_size_col) # Get output directory which is specific to the script parameters param_str = ", ".join([ "input_dim = %i" % input_dim, "output_dim = %i" % output_dim, "n_train = %i" % n_train, "n_iters = %i" % n_iters, "batch_size_optimise = %r" % batch_size_optimise, "use_replacement = %r" % use_replacement, "ylims = %r" % ylims, "n_plots = %i" % n_plots, ]) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Error vs batch", param_str ) # Call optimisation function gd_optimiser.optimise( model, sin_data, result=result, batch_getter=batch_getter, terminator=terminator, evaluator=evaluator, ) # Make output plots print("Plotting output plots in \"%s\"..." % output_dir) frame_duration_ms = 1000 * gif_duration / n_plots if ylims is None: y_lim_left = None y_lim_right = None else: y_lim_left = ylims[:2] y_lim_right = ylims[2:] plotting.plot_error_reductions_vs_batch_size_gif( result, optimal_batch_size_col, output_dir, y_lim_left=y_lim_left, y_lim_right=y_lim_right, duration=frame_duration_ms, loop=None ) plotting.plot_optimal_batch_sizes( "Optimal batch size", output_dir, result, optimal_batch_size_col, )