def test_plot_training_curves(): np.random.seed(79) n_models = np.random.randint(2, 5) results_list = [] for j in range(n_models): n_iters = np.random.randint(10, 20) output_dim = np.random.randint(2, 5) n = get_random_network(input_dim=2, output_dim=output_dim) d = data.Sinusoidal(input_dim=2, output_dim=output_dim, n_train=150) w = n.get_parameter_vector() result = optimisers.Result(name="Network {}".format(j)) result.begin() # Call the result.update method a few times for i in range(n_iters): n.set_parameter_vector(w + i) result.update(model=n, dataset=d, iteration=i) results_list.append(result) plotting.plot_training_curves( results_list, "Test plot_training_curves", output_dir, e_lims=None, n_iqr=1, )
# Generate random network and data n = models.NeuralNetwork( input_dim=1, output_dim=1, num_hidden_units=[10], act_funcs=[models.activations.cauchy, models.activations.identity]) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch()) # Plot predictions x_pred = np.linspace(-2, 2, 200).reshape(1, -1) y_pred = n.forward_prop(x_pred) plotting.plot_1D_regression("Gradient descent predictions for 1D sin data", output_dir, sin_data, x_pred, y_pred) # Plot learning curve plotting.plot_training_curves( [result], "Gradient descent learning curves for 1D sin data", output_dir, e_lims=[0, 0.02])
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for this script, wrapped by argparse for command-line arguments. """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, time limit, and results list np.random.seed(9251) sin_data = data.Sinusoidal(input_dim=input_dim, output_dim=output_dim, n_train=n_train) t_interval = t_lim / 50 results_list = [] for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantPreActivationStatistics( sin_data.x_train, sin_data.y_train)) # Set name for experiment name = "Constant pre-activation statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantParameterStatistics()) # Set name for experiment name = "Constant parameter statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plotting.plot_training_curves( results_list, "Comparing initialisers for gradient descent on 2D sinusoidal data", output_dir, e_lims=e_lims, tp=0.5)
# Generate random network and store initial parameters n = NeuralNetwork(1, 1, [10], [activations.Gaussian(), activations.Identity()]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_ls = optimisers.gradient_descent(n, sin_data, n_iters=n_iters, eval_every=eval_every, verbose=True, name="SGD with line search", line_search_flag=True) results_list.append(result_ls) # Try again without line search n.set_parameter_vector(w0) result_no_ls = optimisers.gradient_descent(n, sin_data, n_iters=n_iters, eval_every=eval_every, verbose=True, name="SGD without line search", line_search_flag=False) results_list.append(result_no_ls) # Compare training curves plotting.plot_training_curves(results_list, "Comparing line-search vs no line-search", output_dir, e_lims=[0, 0.2])
def main(args): """ Main function for the script. See module docstring for more info. Inputs: - args: object containing modified command line arguments as attributes """ np.random.seed(args.seed) # Get output directory which is specific to the relevant script parameters param_str = " ".join([ "d%s" % args.dataset_type.__name__[:3], "i%s" % args.input_dim, "o%s" % args.output_dim, "t%s" % args.t_lim, "n%s" % args.n_train, "b%s" % args.batch_size, "u%s" % args.num_hidden_units, ]) if args.dynamic_terminator: dt_str = "dyn" if args.dt_smooth_output: dt_str += "sOut" if args.dt_smooth_mrse: dt_str += "sMrStd" param_str += " %s%i" % (dt_str, args.dt_buffer_length) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Train gradient descent", param_str, ) if not os.path.isdir(output_dir): os.makedirs(output_dir) # Perform warmup experiment optimisers.warmup() # Initialise lists of objects that will be stored for each repeat result_list = [] model_list = [] prediction_column_list = [] # Initialise dataset object and corresponding error function dataset_kwargs = { "input_dim": args.input_dim, "n_train": args.n_train, } if not issubclass(args.dataset_type, data.BinaryClassification): dataset_kwargs["output_dim"] = args.output_dim dataset = args.dataset_type(**dataset_kwargs) if isinstance(dataset, data.Regression): error_func = models.errors.sum_of_squares act_funcs = None print("Using regression data set with sum of squares error function") elif isinstance(dataset, data.BinaryClassification): error_func = models.errors.binary_cross_entropy act_funcs = [models.activations.gaussian, models.activations.logistic] print("Using binary classification data set with binary cross-entropy " "error function, and logistic activation function in the output " "layer") elif isinstance(dataset, data.Classification): error_func = models.errors.softmax_cross_entropy act_funcs = None print("Using classification data set with softmax cross entropy error " "function") else: raise ValueError( "Data set must be either a binary-classification, multi-class " "classification or regression data set") # Iterate through repeats for i in range(args.n_repeats): # Initialise model and Result object model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=args.num_hidden_units, error_func=error_func, act_funcs=act_funcs, ) result = optimisers.Result(name="Repeat %i" % (i + 1)) if args.line_search is not None: args.line_search_col = columns.StepSize(args.line_search) result.add_column(args.line_search_col) if args.plot_pred_gif or args.plot_hidden_gif: pred_column = columns.Predictions( dataset=dataset, store_hidden_layer_outputs=args.plot_hidden_gif, store_hidden_layer_preactivations=( args.plot_hidden_preactivations_gif), ) result.add_column(pred_column) if args.plot_test_set_improvement_probability: test_set_improvement_column = ( columns.TestSetImprovementProbabilitySimple( model, dataset, smoother=optimisers.smooth.MovingAverage(1, n=10), )) result.add_column(test_set_improvement_column) if args.dynamic_terminator: dynamic_terminator = optimisers.DynamicTerminator( model=model, dataset=dataset, batch_size=args.batch_size, replace=False, t_lim=args.t_lim, smooth_n=args.dt_buffer_length, smooth_x0=args.dt_x0, smooth_output=args.dt_smooth_output, smooth_mean_reduction=args.dt_smooth_mrse, smooth_std=args.dt_smooth_mrse, ) terminator = dynamic_terminator batch_getter = dynamic_terminator dynamic_terminator_column = columns.BatchImprovementProbability( dynamic_terminator, ) result.add_column(dynamic_terminator_column) else: terminator = optimisers.Terminator(t_lim=args.t_lim) batch_getter = optimisers.batch.ConstantBatchSize( args.batch_size, True, ) # Perform gradient descent optimisers.gradient_descent( model, dataset, line_search=args.line_search, result=result, evaluator=optimisers.Evaluator(t_interval=args.t_eval), terminator=terminator, batch_getter=batch_getter, ) # Store results result_list.append(result) model_list.append(model) if args.plot_pred_gif or args.plot_hidden_gif: prediction_column_list.append(pred_column) # Make output plots print("Plotting output plots in \"%s\"..." % output_dir) os.system("explorer \"%s\"" % output_dir) print("Plotting training curves...") plotting.plot_training_curves( result_list, dir_name=output_dir, e_lims=args.error_lims, ) if args.plot_test_set_improvement_probability or args.dynamic_terminator: attribute_list = [ columns.TrainError, columns.TestError, columns.StepSize, ] if args.plot_test_set_improvement_probability: print("Plotting test set improvement probability...") attribute_list.append(columns.TestSetImprovementProbabilitySimple) if args.dynamic_terminator: print("Plotting batch improvement probability...") attribute_list.append(columns.BatchImprovementProbability) plotting.plot_result_attributes_subplots( plot_name="Improvement probability\n%s" % param_str, dir_name=output_dir, result_list=result_list, attribute_list=attribute_list, log_axes_attributes=[columns.StepSize], iqr_axis_scaling=True, ) for i, model in enumerate(model_list): output_dir_repeat = os.path.join(output_dir, "Repeat %i" % (i + 1)) if args.plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir_repeat, dataset=dataset, output_dim=args.output_dim, model=model, ) if args.plot_pred_gif: print("Plotting gif of predictions during training...") plotting.plot_predictions_gif( plot_name="Model predictions during training", dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, ) if args.plot_hidden_gif: print("Plotting gif of hidden layers during training...") if args.plot_hidden_preactivations_gif: plot_name = "Hidden layer preactivations during training" else: plot_name = "Hidden layer outputs during training" plotting.plot_hidden_outputs_gif( plot_name=plot_name, dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, )
def main(args): np.random.seed(args.seed) # Initialise network model network = models.NeuralNetwork( input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hidden_units=args.num_hidden_units, ) # Get output directory which is specific to the relevant script parameters param_str = " ".join([ "mnist" if args.use_mnist_data else "synthetic", "r%s" % args.regulariser, "e%s" % args.error_scale_coefficient, "u%s" % args.num_hidden_units, ]) if args.use_mnist_data: param_str += " " + " ".join([ "t%s" % args.mnist_num_train_tasks, "l%s" % args.mnist_train_distribution_label, "o%s" % args.mnist_out_of_distribution_label, ]) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Train Dinosaur", param_str, ) if os.path.isdir(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) print("Saving output plots in \"%s\"" % output_dir) os.system("explorer \"%s\"" % output_dir) # Initialise data if args.use_mnist_data: task_set, out_of_distribution_task = get_mnist_data(args) else: task_set, out_of_distribution_task = get_synthetic_data() # Initialise meta-learning model regulariser_type = models.dinosaur.regularisers.regulariser_names_dict[ args.regulariser] regulariser = regulariser_type( error_scale_coefficient=args.error_scale_coefficient, ) dinosaur = models.Dinosaur( network=network, regulariser=regulariser, primary_initialisation_task=task_set.task_list[0], secondary_initialisation_task=task_set.task_list[1], ) for _ in range(10): # Perform one outer-loop iteration of meta-learning dinosaur._result.display_headers() dinosaur.meta_learn( task_set, terminator=optimisers.Terminator(i_lim=1), ) # Check that the mean and scale are converging to sensible values print(regulariser.mean) print(regulariser.parameter_scale) print(regulariser.error_scale) # Compare adapting to an out-of-distribution task dinosaur.fast_adapt(out_of_distribution_task) # Plot training curves plotting.plot_training_curves([dinosaur._result], dir_name=output_dir) # Plot task predictions after meta-learning for i, task in enumerate(task_set.task_list): print("Plotting adaptations to task %i" % i) dinosaur.fast_adapt(task) plotting.plot_2D_regression( "Dinosaur task %i" % i, output_dir, task, OUTPUT_DIM, model=network, ) plot_hidden_activations( task, network, "Hidden activations for task %i" % i, output_dir, ) # Plot adaptation to out of distribution task print("Plotting adaptation to out of distribution task") dinosaur.fast_adapt(out_of_distribution_task) plotting.plot_2D_regression( "Dinosaur predictions for out-of-distribution task", output_dir, out_of_distribution_task, OUTPUT_DIM, model=network, ) plot_hidden_activations( out_of_distribution_task, network, "Hidden activations for out-of-distribution task", output_dir, ) # Plot adaptation to out of distribution task without regularisation print("Plotting adaptation without regularisation") if isinstance(regulariser, models.dinosaur.regularisers.Eve): ls = optimisers.LineSearch() dinosaur._optimiser = optimisers.GradientDescent(ls) else: network._regulariser.error_scale = 0 network.set_parameter_vector(regulariser.mean) dinosaur.fast_adapt(out_of_distribution_task) plotting.plot_2D_regression( "Dinosaur predictions for out-of-distribution task without " "regularisation", output_dir, out_of_distribution_task, OUTPUT_DIM, model=network, ) plot_hidden_activations( out_of_distribution_task, network, "Hidden activations for out-of-distribution task without " "regularisation", output_dir, )
# Call gradient descent function result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(50) ) # Plot predictions x_pred_0 = x_pred_1 = np.linspace(x_lo, x_hi) plotting.plot_2D_nD_regression( "Gradient descent predictions for 2D-%iD sinusoid" % output_dim, output_dir, n_output_dims=output_dim, dataset=sin_data, x_pred_0=x_pred_0, x_pred_1=x_pred_1, model=model ) # Plot learning curve plotting.plot_training_curves( [result], "Gradient descent learning curves for 2D-%iD sinusoid" % output_dim, output_dir, e_lims=[0, 0.5] )
line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(int(batch_size))) results_list.append(result) # Try again with full training set n.set_parameter_vector(w0) result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.FullTrainingSet(), result=optimisers.Result(name="Full training set", verbose=True), ) results_list.append(result) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plotting.plot_training_curves( results_list, "Comparing batch sizes for gradient descent on 2D sinusoidal data", output_dir, e_lims=[0, 1.5], tp=0.5) print("Script run in {:.3f} s".format(perf_counter() - t_0))
for seed in [2295, 6997, 7681]: # Set the random seed np.random.seed(seed) # Generate random network and store initial parameters n = NeuralNetwork(input_dim=2, output_dim=output_dim, num_hidden_units=[20, 20], act_funcs=[activations.Cauchy(), activations.Identity()]) # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch()) results_list.append(result) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plotting.plot_training_curves( results_list, "Training curve for gradient descent on 2D sinusoidal data", output_dir, e_lims=[0, 4])
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - batch_size: positive integer batch size to use for training - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network and store initial parameters n = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_gd_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_ls) # Try again without line search n.set_parameter_vector(w0) result_gd_no_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_no_ls) # Call generalised Newton function n.set_parameter_vector(w0) result_pbgn_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_ls) # Try again without line search n.set_parameter_vector(w0) result_pbgn_no_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_no_ls) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plot_name = "Comparing gradient descent vs generalised Newton" plot_name += ", %iD-%iD data" % (input_dim, output_dim) plot_name += ", %.2g s training time" % t_lim plot_name += ", %s hidden units" % str(num_hidden_units) plotting.plot_training_curves(results_list, plot_name, output_dir, e_lims=e_lims)
def main(input_dim, output_dim, n_train, t_lim, num_hidden_units, e_lims, n_repeats, alpha_smooth, p_c, min_batch_size): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment - alpha_smooth: float in (0, 1), amount of smoothing to apply to DBS batch size """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network model = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) # Call gradient descent function result = optimisers.Result("Repeat = %i" % i) batch_getter = optimisers.batch.DynamicBatchSize( model, sin_data, alpha_smooth=alpha_smooth, prob_correct_direction=p_c, min_batch_size=min_batch_size) batch_col = optimisers.results.columns.BatchSize(batch_getter) dbs_col = optimisers.results.columns.DbsMetric() result.add_column(batch_col) result.add_column(dbs_col) result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=result, line_search=optimisers.LineSearch(), batch_getter=batch_getter) results_list.append(result) # Compare training curves plot_name_suffix = "\n%iD-%iD data" % (input_dim, output_dim) plot_name_suffix += ", %.2g s training time" % t_lim plot_name_suffix += ", %s hidden units" % str(num_hidden_units) plot_name_suffix += "\nalpha_smooth = %.3f" % alpha_smooth plot_name_suffix += ", p_c = %.3f" % p_c plot_name_suffix += ", min_batch_size = %.3f" % min_batch_size this_test_output_dir = os.path.join(output_dir, plot_name_suffix.replace("\n", "")) plotting.plot_training_curves(results_list, "DBS learning curves" + plot_name_suffix, this_test_output_dir, e_lims=e_lims) for col in [dbs_col, batch_col]: plot_name = "%s against iteration for dynamic batch size" % col.name plot_name += plot_name_suffix plotting.plot_result_attribute(plot_name, this_test_output_dir, results_list, type(col))