def test_standard_columns(): """ Test using a Result object with the standard columns, which are added to a Result object by default """ # Initialise random seed and number of training data points and iterations set_random_seed_from_args("test_standard_columns") n_train = np.random.randint(10, 20) n_its = np.random.randint(10, 20) # Initialise model and data set model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) # Initialise output file test_name = "Test standard columns" output_filename = "test_standard_columns.txt" with open(os.path.join(output_dir, output_filename), "w") as f: # Initialise result object result = optimisers.Result(name=test_name, file=f, add_default_columns=True) # Perform optimisation optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1)) # Check that each column object has the correct number of values for col_type in optimisers.results.DEFAULT_COLUMN_TYPES: # "n_its + 1" because we evaluate the initial state of the model assert len(result.get_values(col_type)) == n_its + 1
def test_dbs_column(): """ Test using a DBS column with a Result object """ # Set random seed and initialise network and dataset set_random_seed_from_args("test_dbs_column") n_train = np.random.randint(10, 20) n_its = np.random.randint(10, 20) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) # Initialise output file and Result object test_name = "Test DBS column" output_filename = "test_dbs_columns.txt" with open(os.path.join(output_dir, output_filename), "w") as f: result = optimisers.Result(name=test_name, file=f, add_default_columns=True) result.add_column(optimisers.results.columns.DbsMetric()) # Initialise gradient vector before DBS is calculated model.get_gradient_vector(sin_data.train.x, sin_data.train.y) optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), )
def test_step_size_column(): """ Test using step size column with a Result object """ set_random_seed_from_args("test_step_size_column") n_train = np.random.randint(10, 20) n_its = np.random.randint(10, 20) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) test_name = "Test line search column" output_filename = "test_step_size_column.txt" with open(os.path.join(output_dir, output_filename), "w") as f: ls = optimisers.LineSearch() result = optimisers.Result( name=test_name, file=f, add_default_columns=True, ) result.add_column(optimisers.results.columns.StepSize(ls)) optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, )
def test_plot_predictions_gif(dataset_type): """ Test function to make a gif of predictions formed by the model during training, for regression or classification """ # Set random seed and initialise network and dataset set_random_seed_from_args("test_plot_predictions_gif", dataset_type) n_train = np.random.randint(10, 20) n_pred = np.random.randint(5, 10) n_its = 2 if issubclass(dataset_type, data.Regression): input_dim = 1 output_dim = 1 dataset_kwargs = {"x_lo": -1, "x_hi": 1, "freq": 1} elif issubclass(dataset_type, data.Classification): input_dim = 2 output_dim = 3 dataset_kwargs = {} else: raise ValueError( "dataset_type %r must be a subclass of data.Regression or " "data.Classification" % dataset_type) dataset = dataset_type( input_dim=input_dim, output_dim=output_dim, n_train=n_train, **dataset_kwargs, ) model = get_random_network( input_dim=input_dim, output_dim=output_dim, initialiser=models.initialisers.ConstantPreActivationStatistics( x_train=dataset.train.x, y_train=dataset.train.y, ), ) # Initialise Result and Predictions column object result = optimisers.Result(verbose=False) pred_column = optimisers.results.columns.Predictions(dataset, n_pred) result.add_column(pred_column) # Call optimisation function optimisers.gradient_descent( model, dataset, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=optimisers.LineSearch(), ) # Call plotting function plot_name = ("test_plot_predictions_gif, dataset_type = %s" % dataset_type.__name__) plotting.plot_predictions_gif( plot_name=plot_name, dir_name=os.path.join(output_dir, plot_name), result=result, prediction_column=pred_column, dataset=dataset, output_dim=output_dim, duration=1000, )
def test_plot_1D_hidden_outputs(output_dim): """ Test plotting function for outputs from the hidden layers of a model 1-dimensional inputs, and a variable number of outputs """ # Set random seed and parameters set_random_seed_from_args("test_plot_1D_hidden_outputs", output_dim) n_train = np.random.randint(50, 100) n_test = np.random.randint(50, 100) n_pred = np.random.randint(50, 100) # Initialise data and model sin_data = data.Sinusoidal( input_dim=1, output_dim=output_dim, n_train=n_train, n_test=n_test, x_lo=0, x_hi=1, ) model = get_random_network( input_dim=1, output_dim=output_dim, low=2, high=6, initialiser=models.initialisers.ConstantPreActivationStatistics( x_train=sin_data.train.x, y_train=sin_data.train.y, ), ) # Initialise result and column objects result = optimisers.Result(verbose=False, add_default_columns=False) prediction_column = optimisers.results.columns.Predictions( sin_data, n_points_per_dim=n_pred, store_hidden_layer_outputs=True, ) result.add_column(prediction_column) # Call optimisation function optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=1), ) # Call plotting function under test plotting.plot_1D_hidden_outputs( plot_name="test_plot_1D_hidden_outputs, output_dim=%i" % output_dim, dir_name=os.path.join(output_dir, "test_plot_1D_hidden_outputs"), dataset=sin_data, x_pred=prediction_column.x_pred, y_pred=prediction_column.predictions_dict[0], hidden_output_list=prediction_column.hidden_outputs_dict[0], output_dim=output_dim, )
def make_smudge_plots(objective): smudge_plot(objective, lambda f, x: optimisers.gradient_descent( f, x, n_iters=1, line_search_flag=False, learning_rate=2e-1 ), name="SGD smudge plot") smudge_plot(objective, lambda f, x: optimisers.gradient_descent( f, x, n_iters=1, line_search_flag=True, beta=0.99, alpha=0.2 # ), name="SGD+LS smudge plot", n_lines=3) ), name="SGD+LS smudge plot", nx0_sm=6, nx1_sm=6, n_lines=2) smudge_plot(objective, lambda f, x: optimisers.generalised_newton( f, x, n_iters=1, line_search_flag=False, learning_rate=0 ), name="GN smudge plot") smudge_plot(objective, lambda f, x: optimisers.rectified_newton( f, x, n_iters=1, line_search_flag=False, learning_rate=0 ), name="RN smudge plot")
def test_minimise_reentrant(): """ Test that the AbstractOptimiser.optimise method is re-entrant, IE that the function can return, and be called again, and the columns in the result object are as expected (the output from the result object can be found in the corresponding output file) """ # Set parameters for number of iterations and evaluation frequency n_iters_1 = 23 eval_every_1 = 5 n_iters_2 = 31 eval_every_2 = 3 # Create model and data np.random.seed(6307) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) # Open result file results_filename = "Test AbstractOptimiser.optimise method re-entrant.txt" results_path = os.path.join(output_dir, results_filename) with open(results_path, "w") as results_file: # Create Result object result = optimisers.Result(name="SGD without line search", verbose=True, file=results_file) # Call gradient descent function twice result_ls = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(i_lim=n_iters_1), evaluator=optimisers.Evaluator(i_interval=eval_every_1), result=result, display_summary=False) result_ls = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(i_lim=n_iters_2), evaluator=optimisers.Evaluator(i_interval=eval_every_2), result=result) # Check values in time column are monotonically increasing time_values = result.get_values(optimisers.results.columns.Time) for i in range(1, len(time_values)): assert time_values[i] > time_values[i - 1] # Check values in iteration column are monotonically increasing iteration_values = result.get_values(optimisers.results.columns.Iteration) for i in range(1, len(iteration_values)): assert iteration_values[i] > iteration_values[i - 1] # Assert that the list of iteration values is exactly what we expect all_iter_vals = ( list(range(0, n_iters_1, eval_every_1)) + list(range(n_iters_1, n_iters_1 + n_iters_2, eval_every_2)) + [n_iters_1 + n_iters_2]) assert all_iter_vals == iteration_values
def test_plot_result_attribute_subplots(): """ Test plotting function for plotting the values in multiple columns of a Result object over time, with one subplot per column """ np.random.seed(1521) n_its = np.random.randint(10, 20) n_train = np.random.randint(10, 20) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) results_list = [] for i in range(5): model = models.NeuralNetwork(input_dim=1, output_dim=1) model.get_gradient_vector(sin_data.train.x, sin_data.train.y) name = "test_plot_result_attribute_subplots_%i" % (i + 1) output_text_filename = os.path.join(output_dir, name + ".txt") with open(output_text_filename, "w") as f: result = optimisers.Result(name=name, file=f) ls = optimisers.LineSearch() ls_column = optimisers.results.columns.StepSize(ls) dbs_metric_column = optimisers.results.columns.DbsMetric() result.add_column(ls_column) result.add_column(dbs_metric_column) optimisers.gradient_descent( model, sin_data, result=result, line_search=ls, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1)) results_list.append(result) attribute_list = [ optimisers.results.columns.TrainError, optimisers.results.columns.TestError, optimisers.results.columns.Time, type(ls_column), type(dbs_metric_column) ] plotting.plot_result_attributes_subplots( "test_plot_result_attribute_subplots", output_dir, results_list, attribute_list, marker="o", line_style="--", log_axes_attributes={ optimisers.results.columns.TrainError, optimisers.results.columns.TestError, type(ls_column) })
def test_gradient_descent(seed): """ Test gradient descent (no line-search is used, so there is no guarantee that each iteration reduces the error function). """ # Set the random seed np.random.seed(seed) # Generate random number of iterations, network, data, and results file n_iters = np.random.randint(10, 20) n = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) results_filename = "Test gradient descent without line-search.txt" results_path = os.path.join(output_dir, results_filename) results_file = open(results_path, "w") result = optimisers.Result(name="SGD without line search", verbose=True, file=results_file) # Call gradient descent function result_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(i_lim=n_iters), evaluator=optimisers.Evaluator(i_interval=1), result=result) results_file.close()
def run_experiment( num_units, num_layers, log10_s0, alpha, beta, act_func, max_steps, batch_size, batch_replace, plot_preds=False, ): # Initialise network and batch getter model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=[num_units for _ in range(num_layers)], act_funcs=[act_func, models.activations.identity], ) if (batch_size is None) or (batch_size >= args.n_train): batch_getter = optimisers.batch.FullTrainingSet() else: batch_getter = optimisers.batch.ConstantBatchSize( batch_size, batch_replace, ) # Perform gradient descent result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=args.t_lim), evaluator=optimisers.Evaluator(t_interval=args.t_eval), line_search=optimisers.LineSearch( s0=pow(10, log10_s0), alpha=alpha, beta=beta, max_its=max_steps, ), batch_getter=batch_getter, ) # If specified, plot the final model predictions if plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir, dataset=sin_data, output_dim=args.output_dim, model=model, ) # Return the final test error TestError = optimisers.results.columns.TestError final_test_error = result.get_values(TestError)[-1] return final_test_error
def test_plot_hidden_outputs_gif(): """ Test function to make a gif of hidden layer outputs and predictions formed by the model during training, for regression or classification """ # Set random seed and initialise network and dataset set_random_seed_from_args("test_plot_hidden_outputs_gif") n_train = np.random.randint(10, 20) n_pred = np.random.randint(5, 10) n_its = 2 dataset = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) model = get_random_network( input_dim=1, output_dim=1, initialiser=models.initialisers.ConstantPreActivationStatistics( x_train=dataset.train.x, y_train=dataset.train.y, ), ) # Initialise Result and Predictions column object result = optimisers.Result(verbose=False) pred_column = optimisers.results.columns.Predictions( dataset, n_pred, store_hidden_layer_outputs=True, ) result.add_column(pred_column) # Call optimisation function optimisers.gradient_descent( model, dataset, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=optimisers.LineSearch(), ) # Call plotting function plot_name = "test_plot_hidden_outputs_gif" plotting.plot_hidden_outputs_gif( plot_name=plot_name, dir_name=os.path.join(output_dir, plot_name), result=result, prediction_column=pred_column, dataset=dataset, output_dim=1, duration=1000, )
def test_batch_improvement_probability_column(smooth_output): """ Test using a column which measures the probability of improvement in each consecutive batch """ set_random_seed_from_args( "test_batch_improvement_probability_column", smooth_output, ) n_train = np.random.randint(10, 20) n_its = np.random.randint(50, 100) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) batch_size = np.random.randint(3, sin_data.train.n) test_name = ( "test_batch_improvement_probability_column, smooth_output=%s" % smooth_output) output_filename = "%s.txt" % test_name with open(os.path.join(output_dir, output_filename), "w") as f: ls = optimisers.LineSearch() result = optimisers.Result( name=test_name, file=f, add_default_columns=True, ) dynamic_terminator = optimisers.DynamicTerminator( model=model, dataset=sin_data, batch_size=batch_size, smooth_output=smooth_output, i_lim=n_its, ) result.add_column( optimisers.results.columns.BatchImprovementProbability( dynamic_terminator, )) optimisers.gradient_descent( model, sin_data, result=result, terminator=dynamic_terminator, batch_getter=dynamic_terminator, evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, )
def test_test_set_improvement_probability_simple_column( smoother_type, use_cdf, ): """ Test using a column which measures the probability of improvement in the test set """ set_random_seed_from_args( "test_test_set_improvement_probability_simple_column", ) n_train = np.random.randint(10, 20) n_its = np.random.randint(50, 100) model = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) test_name = ("test_test_set_improvement_probability_simple_column, " "use_cdf=%s, smoother_type=%s" % (use_cdf, (None if (smoother_type is None) else smoother_type.__name__))) output_filename = "%s.txt" % test_name with open(os.path.join(output_dir, output_filename), "w") as f: ls = optimisers.LineSearch() result = optimisers.Result( name=test_name, file=f, add_default_columns=True, ) smoother = None if (smoother_type is None) else smoother_type(0) result.add_column( optimisers.results.columns.TestSetImprovementProbabilitySimple( model=model, dataset=sin_data, smoother=smoother, use_cdf=use_cdf, )) optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, )
def compare_optimisers_dimensional_efficiency(): n_dims_list = np.unique(np.logspace(0, 3, 10, dtype=np.int)) optimiser_list = [ lambda f, x, f_lim: optimisers.gradient_descent(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), lambda f, x, f_lim: optimisers.generalised_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), lambda f, x, f_lim: optimisers.block_generalised_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), lambda f, x, f_lim: optimisers.parallel_block_generalised_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5, block_size=3), lambda f, x, f_lim: optimisers.rectified_newton(f, x, f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5), ] plot_dimensional_efficiency(optimiser_list, objectives.Gaussian, n_dims_list, distance_ratio=3, name="Dimensional efficiency of different optimisers")
def test_gradient_descent_line_search(seed): """ Test gradient descent, using a line-search. A line-search should guarantee that each iteration reduces the error function, so this is tested using assert statements after calling the gradient_descent function. """ # Set the random seed np.random.seed(seed) # Generate random number of iterations, network, data, and results file n_iters = np.random.randint(10, 20) n = get_random_network(input_dim=1, output_dim=1) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) results_filename = ( "Test gradient descent with line-search, seed = %i.txt" % seed) results_path = os.path.join(output_dir, results_filename) results_file = open(results_path, "w") result = optimisers.Result(name="SGD with line search", verbose=True, file=results_file) # Add step size column to result ls = optimisers.LineSearch(max_its=int(1e10)) result.add_column(optimisers.results.columns.StepSize(ls)) # Call gradient descent function result_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(i_lim=n_iters), evaluator=optimisers.Evaluator(i_interval=1), line_search=ls, result=result) # Make sure each iteration reduces the training error train_error_list = result.get_values(optimisers.results.columns.TrainError) for i in range(len(train_error_list) - 1): assert train_error_list[i + 1] < train_error_list[i] results_file.close()
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for this script, wrapped by argparse for command-line arguments. """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, time limit, and results list np.random.seed(9251) sin_data = data.Sinusoidal(input_dim=input_dim, output_dim=output_dim, n_train=n_train) t_interval = t_lim / 50 results_list = [] for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantPreActivationStatistics( sin_data.x_train, sin_data.y_train)) # Set name for experiment name = "Constant pre-activation statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantParameterStatistics()) # Set name for experiment name = "Constant parameter statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plotting.plot_training_curves( results_list, "Comparing initialisers for gradient descent on 2D sinusoidal data", output_dir, e_lims=e_lims, tp=0.5)
eval_every = n_iters // 20 results_list = [] for seed in [2295, 6997, 7681]: # Set the random seed np.random.seed(seed) # Generate random network and store initial parameters n = NeuralNetwork(1, 1, [10], [activations.Gaussian(), activations.Identity()]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_ls = optimisers.gradient_descent(n, sin_data, n_iters=n_iters, eval_every=eval_every, verbose=True, name="SGD with line search", line_search_flag=True) results_list.append(result_ls) # Try again without line search n.set_parameter_vector(w0) result_no_ls = optimisers.gradient_descent(n, sin_data, n_iters=n_iters, eval_every=eval_every, verbose=True, name="SGD without line search", line_search_flag=False) results_list.append(result_no_ls)
def test_predictions_column( input_dim, output_dim, store_hidden, store_preactivations, ): """ Test using a column which stores model predictions during training """ # Set random seed and initialise network and dataset set_random_seed_from_args( "test_predictions_column", input_dim, output_dim, store_hidden, ) n_train = np.random.randint(10, 20) n_pred = ceil(pow(np.random.randint(5, 10), 1 / input_dim)) n_its = np.random.randint(10, 20) model = get_random_network(input_dim=input_dim, output_dim=output_dim) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) # Initialise output file and Result object test_name = "test_predictions_column, %id-%id data, store_hidden=%s" % ( input_dim, output_dim, store_hidden, ) output_filename = "%s.txt" % test_name with open(os.path.join(output_dir, output_filename), "w") as f: # Initialise result object result = optimisers.Result(name=test_name, file=f, add_default_columns=True) # Initialise column object and add to the result columns = optimisers.results.columns prediction_column = columns.Predictions( sin_data, n_points_per_dim=n_pred, store_hidden_layer_outputs=store_hidden, store_hidden_layer_preactivations=store_preactivations, ) result.add_column(prediction_column) # Call optimisation function optimisers.gradient_descent( model, sin_data, result=result, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1), ) # Print Predictions column attributes to file print("\n\nx_pred:", prediction_column.x_pred, sep="\n", file=f) iter_list = result.get_values(columns.Iteration) print("\n\nPredictions:", file=f) for i in iter_list: print("i = %i:" % i, file=f) print(prediction_column.predictions_dict[i], file=f) if store_hidden: print("\n\nHidden layer outputs:", file=f) for i in iter_list: print( "\ni = %i:" % i, *prediction_column.hidden_outputs_dict[i], file=f, sep="\n\n", ) # Test that the Prediction object attributes are as expected n_pred_grid = pow(n_pred, input_dim) assert prediction_column.x_pred.shape == (input_dim, n_pred_grid) iter_set = set(iter_list) assert set(prediction_column.predictions_dict.keys()) == iter_set for y_pred in prediction_column.predictions_dict.values(): assert y_pred.shape == (output_dim, n_pred_grid) hidden_outputs_dict = prediction_column.hidden_outputs_dict if store_hidden: assert set(hidden_outputs_dict.keys()) == iter_set for hidden_output_list in hidden_outputs_dict.values(): assert len(hidden_output_list) == len(model.layers) - 1 for i, hidden_output in enumerate(hidden_output_list): expected_shape = (model.layers[i].output_dim, n_pred_grid) assert hidden_output.shape == expected_shape else: assert len(hidden_outputs_dict) == 0
# Create result object and add columns for iteration and DBS result = optimisers.Result( name="SGD with line search", verbose=True, add_default_columns=False ) i_column = optimisers.results.columns.Iteration() dbs_column = optimisers.results.columns.DbsMetric() result.add_column(i_column) result.add_column(dbs_column) # Call gradient descent function model.get_gradient_vector(sin_data.x_train, sin_data.y_train) result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(i_lim=i_lim), evaluator=optimisers.Evaluator(i_interval=i_interval), result=result, line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(50) ) # Plot DBS metric vs iteration plotting.plot_result_attribute( "Gradient descent DBS metric for 2D-%iD sinusoid" % output_dim, output_dir, [result], type(dbs_column) )
def main(input_dim, output_dim, n_train, t_lim, num_hidden_units, e_lims, n_repeats, alpha_smooth, p_c, min_batch_size): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment - alpha_smooth: float in (0, 1), amount of smoothing to apply to DBS batch size """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network model = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) # Call gradient descent function result = optimisers.Result("Repeat = %i" % i) batch_getter = optimisers.batch.DynamicBatchSize( model, sin_data, alpha_smooth=alpha_smooth, prob_correct_direction=p_c, min_batch_size=min_batch_size) batch_col = optimisers.results.columns.BatchSize(batch_getter) dbs_col = optimisers.results.columns.DbsMetric() result.add_column(batch_col) result.add_column(dbs_col) result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=result, line_search=optimisers.LineSearch(), batch_getter=batch_getter) results_list.append(result) # Compare training curves plot_name_suffix = "\n%iD-%iD data" % (input_dim, output_dim) plot_name_suffix += ", %.2g s training time" % t_lim plot_name_suffix += ", %s hidden units" % str(num_hidden_units) plot_name_suffix += "\nalpha_smooth = %.3f" % alpha_smooth plot_name_suffix += ", p_c = %.3f" % p_c plot_name_suffix += ", min_batch_size = %.3f" % min_batch_size this_test_output_dir = os.path.join(output_dir, plot_name_suffix.replace("\n", "")) plotting.plot_training_curves(results_list, "DBS learning curves" + plot_name_suffix, this_test_output_dir, e_lims=e_lims) for col in [dbs_col, batch_col]: plot_name = "%s against iteration for dynamic batch size" % col.name plot_name += plot_name_suffix plotting.plot_result_attribute(plot_name, this_test_output_dir, results_list, type(col))
def main(args): """ Main function for the script. See module docstring for more info. Inputs: - args: object containing modified command line arguments as attributes """ np.random.seed(args.seed) # Get output directory which is specific to the relevant script parameters param_str = " ".join([ "d%s" % args.dataset_type.__name__[:3], "i%s" % args.input_dim, "o%s" % args.output_dim, "t%s" % args.t_lim, "n%s" % args.n_train, "b%s" % args.batch_size, "u%s" % args.num_hidden_units, ]) if args.dynamic_terminator: dt_str = "dyn" if args.dt_smooth_output: dt_str += "sOut" if args.dt_smooth_mrse: dt_str += "sMrStd" param_str += " %s%i" % (dt_str, args.dt_buffer_length) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Train gradient descent", param_str, ) if not os.path.isdir(output_dir): os.makedirs(output_dir) # Perform warmup experiment optimisers.warmup() # Initialise lists of objects that will be stored for each repeat result_list = [] model_list = [] prediction_column_list = [] # Initialise dataset object and corresponding error function dataset_kwargs = { "input_dim": args.input_dim, "n_train": args.n_train, } if not issubclass(args.dataset_type, data.BinaryClassification): dataset_kwargs["output_dim"] = args.output_dim dataset = args.dataset_type(**dataset_kwargs) if isinstance(dataset, data.Regression): error_func = models.errors.sum_of_squares act_funcs = None print("Using regression data set with sum of squares error function") elif isinstance(dataset, data.BinaryClassification): error_func = models.errors.binary_cross_entropy act_funcs = [models.activations.gaussian, models.activations.logistic] print("Using binary classification data set with binary cross-entropy " "error function, and logistic activation function in the output " "layer") elif isinstance(dataset, data.Classification): error_func = models.errors.softmax_cross_entropy act_funcs = None print("Using classification data set with softmax cross entropy error " "function") else: raise ValueError( "Data set must be either a binary-classification, multi-class " "classification or regression data set") # Iterate through repeats for i in range(args.n_repeats): # Initialise model and Result object model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=args.num_hidden_units, error_func=error_func, act_funcs=act_funcs, ) result = optimisers.Result(name="Repeat %i" % (i + 1)) if args.line_search is not None: args.line_search_col = columns.StepSize(args.line_search) result.add_column(args.line_search_col) if args.plot_pred_gif or args.plot_hidden_gif: pred_column = columns.Predictions( dataset=dataset, store_hidden_layer_outputs=args.plot_hidden_gif, store_hidden_layer_preactivations=( args.plot_hidden_preactivations_gif), ) result.add_column(pred_column) if args.plot_test_set_improvement_probability: test_set_improvement_column = ( columns.TestSetImprovementProbabilitySimple( model, dataset, smoother=optimisers.smooth.MovingAverage(1, n=10), )) result.add_column(test_set_improvement_column) if args.dynamic_terminator: dynamic_terminator = optimisers.DynamicTerminator( model=model, dataset=dataset, batch_size=args.batch_size, replace=False, t_lim=args.t_lim, smooth_n=args.dt_buffer_length, smooth_x0=args.dt_x0, smooth_output=args.dt_smooth_output, smooth_mean_reduction=args.dt_smooth_mrse, smooth_std=args.dt_smooth_mrse, ) terminator = dynamic_terminator batch_getter = dynamic_terminator dynamic_terminator_column = columns.BatchImprovementProbability( dynamic_terminator, ) result.add_column(dynamic_terminator_column) else: terminator = optimisers.Terminator(t_lim=args.t_lim) batch_getter = optimisers.batch.ConstantBatchSize( args.batch_size, True, ) # Perform gradient descent optimisers.gradient_descent( model, dataset, line_search=args.line_search, result=result, evaluator=optimisers.Evaluator(t_interval=args.t_eval), terminator=terminator, batch_getter=batch_getter, ) # Store results result_list.append(result) model_list.append(model) if args.plot_pred_gif or args.plot_hidden_gif: prediction_column_list.append(pred_column) # Make output plots print("Plotting output plots in \"%s\"..." % output_dir) os.system("explorer \"%s\"" % output_dir) print("Plotting training curves...") plotting.plot_training_curves( result_list, dir_name=output_dir, e_lims=args.error_lims, ) if args.plot_test_set_improvement_probability or args.dynamic_terminator: attribute_list = [ columns.TrainError, columns.TestError, columns.StepSize, ] if args.plot_test_set_improvement_probability: print("Plotting test set improvement probability...") attribute_list.append(columns.TestSetImprovementProbabilitySimple) if args.dynamic_terminator: print("Plotting batch improvement probability...") attribute_list.append(columns.BatchImprovementProbability) plotting.plot_result_attributes_subplots( plot_name="Improvement probability\n%s" % param_str, dir_name=output_dir, result_list=result_list, attribute_list=attribute_list, log_axes_attributes=[columns.StepSize], iqr_axis_scaling=True, ) for i, model in enumerate(model_list): output_dir_repeat = os.path.join(output_dir, "Repeat %i" % (i + 1)) if args.plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir_repeat, dataset=dataset, output_dim=args.output_dim, model=model, ) if args.plot_pred_gif: print("Plotting gif of predictions during training...") plotting.plot_predictions_gif( plot_name="Model predictions during training", dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, ) if args.plot_hidden_gif: print("Plotting gif of hidden layers during training...") if args.plot_hidden_preactivations_gif: plot_name = "Hidden layer preactivations during training" else: plot_name = "Hidden layer outputs during training" plotting.plot_hidden_outputs_gif( plot_name=plot_name, dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, )
x_lo=x_lo, x_hi=x_hi ) model = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=[20, 20], act_funcs=[models.activations.cauchy, models.activations.identity] ) # Call gradient descent function result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(50) ) # Plot predictions x_pred_0 = x_pred_1 = np.linspace(x_lo, x_hi) plotting.plot_2D_nD_regression( "Gradient descent predictions for 2D-%iD sinusoid" % output_dim, output_dir, n_output_dims=output_dim, dataset=sin_data, x_pred_0=x_pred_0, x_pred_1=x_pred_1, model=model
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - batch_size: positive integer batch size to use for training - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network and store initial parameters n = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_gd_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_ls) # Try again without line search n.set_parameter_vector(w0) result_gd_no_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_no_ls) # Call generalised Newton function n.set_parameter_vector(w0) result_pbgn_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_ls) # Try again without line search n.set_parameter_vector(w0) result_pbgn_no_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_no_ls) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plot_name = "Comparing gradient descent vs generalised Newton" plot_name += ", %iD-%iD data" % (input_dim, output_dim) plot_name += ", %.2g s training time" % t_lim plot_name += ", %s hidden units" % str(num_hidden_units) plotting.plot_training_curves(results_list, plot_name, output_dir, e_lims=e_lims)
# Set the random seed np.random.seed(2865) # Generate random network and data n = models.NeuralNetwork( input_dim=1, output_dim=1, num_hidden_units=[10], act_funcs=[models.activations.cauchy, models.activations.identity]) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch()) # Plot predictions x_pred = np.linspace(-2, 2, 200).reshape(1, -1) y_pred = n.forward_prop(x_pred) plotting.plot_1D_regression("Gradient descent predictions for 1D sin data", output_dir, sin_data, x_pred, y_pred) # Plot learning curve plotting.plot_training_curves( [result], "Gradient descent learning curves for 1D sin data", output_dir,
input_dim=2, output_dim=output_dim, num_hidden_units=[20, 20], act_funcs=[models.activations.cauchy, models.activations.identity]) w0 = n.get_parameter_vector().copy() # Iterate through constant size batch-getters for batch_size in batch_size_list: # Set name for experiment name = "Batch size = {:04d}".format(int(batch_size)) # Reset parameter vector n.set_parameter_vector(w0) # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(int(batch_size))) results_list.append(result) # Try again with full training set n.set_parameter_vector(w0) result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.FullTrainingSet(), result=optimisers.Result(name="Full training set", verbose=True),