def test_ConstantParameterStatistics(seed): """ Test the models.initialisers.ConstantParameterStatistics class, which initialises a model with constant statistics for the weights and biases in each layer. TODO: test with 0, 1, 2 hidden layers """ np.random.seed(seed) input_dim = np.random.randint(2, 10) output_dim = np.random.randint(2, 10) N_D = np.random.randint(100, 200) x_lo = np.random.uniform(-10, 0) x_hi = np.random.uniform(0, 10) sin_data = data.Sinusoidal(input_dim, output_dim, N_D, 0, x_lo, x_hi) initialiser = models.initialisers.ConstantParameterStatistics() num_hidden_layers = np.random.randint(3, 6) num_hidden_units = np.random.randint(3, 6, num_hidden_layers) nn = models.NeuralNetwork( input_dim, output_dim, num_hidden_units, initialiser=initialiser, ) assert nn.forward_prop(sin_data.train.x).shape == sin_data.train.y.shape output_fname = "test_ConstantParameterStatistics, seed=%i.txt" % seed _print_pre_activation_statistics(nn, output_fname)
def get_random_network( low=3, high=6, act_funcs=None, error_func=None, input_dim=None, output_dim=None, initialiser=None, ): """ Generate a neural network with a random number of inputs, outputs, hidden layers, and number of units in each hidden layer """ if input_dim is None: input_dim = np.random.randint(low, high) if output_dim is None: output_dim = np.random.randint(low, high) num_hidden_layers = np.random.randint(low, high) num_hidden_units = np.random.randint(low, high, num_hidden_layers) n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=act_funcs, error_func=error_func, initialiser=initialiser, ) return n
def run_experiment( num_units, num_layers, log10_s0, alpha, beta, act_func, max_steps, batch_size, batch_replace, plot_preds=False, ): # Initialise network and batch getter model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=[num_units for _ in range(num_layers)], act_funcs=[act_func, models.activations.identity], ) if (batch_size is None) or (batch_size >= args.n_train): batch_getter = optimisers.batch.FullTrainingSet() else: batch_getter = optimisers.batch.ConstantBatchSize( batch_size, batch_replace, ) # Perform gradient descent result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=args.t_lim), evaluator=optimisers.Evaluator(t_interval=args.t_eval), line_search=optimisers.LineSearch( s0=pow(10, log10_s0), alpha=alpha, beta=beta, max_its=max_steps, ), batch_getter=batch_getter, ) # If specified, plot the final model predictions if plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir, dataset=sin_data, output_dim=args.output_dim, model=model, ) # Return the final test error TestError = optimisers.results.columns.TestError final_test_error = result.get_values(TestError)[-1] return final_test_error
def warmup(n_its=1000): """ Perform warmup routine; useful to call in scripts before testing the speed of an optimiser, because the process priority often appears to be initially slow """ sin_data = data.Sinusoidal(1, 1, freq=1) n = models.NeuralNetwork(1, 1, [20]) gradient_descent( n, sin_data, terminator=Terminator(i_lim=n_its), evaluator=Evaluator(i_interval=n_its//10), result=Result(name="Warmup", verbose=True), line_search=None )
def test_plot_result_attribute_subplots(): """ Test plotting function for plotting the values in multiple columns of a Result object over time, with one subplot per column """ np.random.seed(1521) n_its = np.random.randint(10, 20) n_train = np.random.randint(10, 20) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, n_train=n_train) results_list = [] for i in range(5): model = models.NeuralNetwork(input_dim=1, output_dim=1) model.get_gradient_vector(sin_data.train.x, sin_data.train.y) name = "test_plot_result_attribute_subplots_%i" % (i + 1) output_text_filename = os.path.join(output_dir, name + ".txt") with open(output_text_filename, "w") as f: result = optimisers.Result(name=name, file=f) ls = optimisers.LineSearch() ls_column = optimisers.results.columns.StepSize(ls) dbs_metric_column = optimisers.results.columns.DbsMetric() result.add_column(ls_column) result.add_column(dbs_metric_column) optimisers.gradient_descent( model, sin_data, result=result, line_search=ls, terminator=optimisers.Terminator(i_lim=n_its), evaluator=optimisers.Evaluator(i_interval=1)) results_list.append(result) attribute_list = [ optimisers.results.columns.TrainError, optimisers.results.columns.TestError, optimisers.results.columns.Time, type(ls_column), type(dbs_metric_column) ] plotting.plot_result_attributes_subplots( "test_plot_result_attribute_subplots", output_dir, results_list, attribute_list, marker="o", line_style="--", log_axes_attributes={ optimisers.results.columns.TrainError, optimisers.results.columns.TestError, type(ls_column) })
def color_right_half(recolored_lh_arr, gray_lh_arr, gray_rh_arr, patch_dim, number_of_colors, centroids_arr, assigned_clusters_arr, arch, alpha, noi, method, bs): X = create_X(gray_lh_arr, patch_dim) Y = create_Y(assigned_clusters_arr, number_of_colors, patch_dim) X, mean_arr, max_arr, min_arr = scale_features(X) # recolored_rh_arr = color_right_half(X,Y) nn = models.NeuralNetwork() nn.set_architecture(arch) nn.set_activations(['sigm' for _ in range(len(arch) - 2)] + ['sigm']) nn.train(X, Y, alpha, noi, method) recolored_rh_arr = np.zeros(gray_rh_arr.shape + (3, )) # shape of recolored right half for r in range(0, gray_rh_arr.shape[0] - patch_dim + 1): for c in range(0, gray_rh_arr.shape[1] - patch_dim + 1): gray_rh_patch = (gray_rh_arr[r:r + patch_dim, c:c + patch_dim]) patch_flat = gray_rh_patch.reshape(1, patch_dim**2) patch_flat = (patch_flat - mean_arr) / (max_arr - min_arr) y_hat = (nn.forward_prop(patch_flat))[-1] index = np.argmax(y_hat) recolored_rh_arr[r + patch_dim // 2, c + patch_dim // 2, :] = centroids_arr[index] return recolored_rh_arr
# Set time limit for training and evaluation frequency t_lim = 5 t_interval = t_lim / 10 # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Set the random seed np.random.seed(2865) # Generate random network and data n = models.NeuralNetwork( input_dim=1, output_dim=1, num_hidden_units=[10], act_funcs=[models.activations.cauchy, models.activations.identity]) sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1) # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch()) # Plot predictions x_pred = np.linspace(-2, 2, 200).reshape(1, -1)
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for this script, wrapped by argparse for command-line arguments. """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, time limit, and results list np.random.seed(9251) sin_data = data.Sinusoidal(input_dim=input_dim, output_dim=output_dim, n_train=n_train) t_interval = t_lim / 50 results_list = [] for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantPreActivationStatistics( sin_data.x_train, sin_data.y_train)) # Set name for experiment name = "Constant pre-activation statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Generate random network n = models.NeuralNetwork( input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[models.activations.cauchy, models.activations.identity], initialiser=models.initialisers.ConstantParameterStatistics()) # Set name for experiment name = "Constant parameter statistics" # Call gradient descent function result = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name=name, verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plotting.plot_training_curves( results_list, "Comparing initialisers for gradient descent on 2D sinusoidal data", output_dir, e_lims=e_lims, tp=0.5)
def main(args): """ Main function for the script. See module docstring for more info. Inputs: - args: object containing modified command line arguments as attributes """ np.random.seed(args.seed) # Get output directory which is specific to the relevant script parameters param_str = " ".join([ "d%s" % args.dataset_type.__name__[:3], "i%s" % args.input_dim, "o%s" % args.output_dim, "t%s" % args.t_lim, "n%s" % args.n_train, "b%s" % args.batch_size, "u%s" % args.num_hidden_units, ]) if args.dynamic_terminator: dt_str = "dyn" if args.dt_smooth_output: dt_str += "sOut" if args.dt_smooth_mrse: dt_str += "sMrStd" param_str += " %s%i" % (dt_str, args.dt_buffer_length) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Train gradient descent", param_str, ) if not os.path.isdir(output_dir): os.makedirs(output_dir) # Perform warmup experiment optimisers.warmup() # Initialise lists of objects that will be stored for each repeat result_list = [] model_list = [] prediction_column_list = [] # Initialise dataset object and corresponding error function dataset_kwargs = { "input_dim": args.input_dim, "n_train": args.n_train, } if not issubclass(args.dataset_type, data.BinaryClassification): dataset_kwargs["output_dim"] = args.output_dim dataset = args.dataset_type(**dataset_kwargs) if isinstance(dataset, data.Regression): error_func = models.errors.sum_of_squares act_funcs = None print("Using regression data set with sum of squares error function") elif isinstance(dataset, data.BinaryClassification): error_func = models.errors.binary_cross_entropy act_funcs = [models.activations.gaussian, models.activations.logistic] print("Using binary classification data set with binary cross-entropy " "error function, and logistic activation function in the output " "layer") elif isinstance(dataset, data.Classification): error_func = models.errors.softmax_cross_entropy act_funcs = None print("Using classification data set with softmax cross entropy error " "function") else: raise ValueError( "Data set must be either a binary-classification, multi-class " "classification or regression data set") # Iterate through repeats for i in range(args.n_repeats): # Initialise model and Result object model = models.NeuralNetwork( input_dim=args.input_dim, output_dim=args.output_dim, num_hidden_units=args.num_hidden_units, error_func=error_func, act_funcs=act_funcs, ) result = optimisers.Result(name="Repeat %i" % (i + 1)) if args.line_search is not None: args.line_search_col = columns.StepSize(args.line_search) result.add_column(args.line_search_col) if args.plot_pred_gif or args.plot_hidden_gif: pred_column = columns.Predictions( dataset=dataset, store_hidden_layer_outputs=args.plot_hidden_gif, store_hidden_layer_preactivations=( args.plot_hidden_preactivations_gif), ) result.add_column(pred_column) if args.plot_test_set_improvement_probability: test_set_improvement_column = ( columns.TestSetImprovementProbabilitySimple( model, dataset, smoother=optimisers.smooth.MovingAverage(1, n=10), )) result.add_column(test_set_improvement_column) if args.dynamic_terminator: dynamic_terminator = optimisers.DynamicTerminator( model=model, dataset=dataset, batch_size=args.batch_size, replace=False, t_lim=args.t_lim, smooth_n=args.dt_buffer_length, smooth_x0=args.dt_x0, smooth_output=args.dt_smooth_output, smooth_mean_reduction=args.dt_smooth_mrse, smooth_std=args.dt_smooth_mrse, ) terminator = dynamic_terminator batch_getter = dynamic_terminator dynamic_terminator_column = columns.BatchImprovementProbability( dynamic_terminator, ) result.add_column(dynamic_terminator_column) else: terminator = optimisers.Terminator(t_lim=args.t_lim) batch_getter = optimisers.batch.ConstantBatchSize( args.batch_size, True, ) # Perform gradient descent optimisers.gradient_descent( model, dataset, line_search=args.line_search, result=result, evaluator=optimisers.Evaluator(t_interval=args.t_eval), terminator=terminator, batch_getter=batch_getter, ) # Store results result_list.append(result) model_list.append(model) if args.plot_pred_gif or args.plot_hidden_gif: prediction_column_list.append(pred_column) # Make output plots print("Plotting output plots in \"%s\"..." % output_dir) os.system("explorer \"%s\"" % output_dir) print("Plotting training curves...") plotting.plot_training_curves( result_list, dir_name=output_dir, e_lims=args.error_lims, ) if args.plot_test_set_improvement_probability or args.dynamic_terminator: attribute_list = [ columns.TrainError, columns.TestError, columns.StepSize, ] if args.plot_test_set_improvement_probability: print("Plotting test set improvement probability...") attribute_list.append(columns.TestSetImprovementProbabilitySimple) if args.dynamic_terminator: print("Plotting batch improvement probability...") attribute_list.append(columns.BatchImprovementProbability) plotting.plot_result_attributes_subplots( plot_name="Improvement probability\n%s" % param_str, dir_name=output_dir, result_list=result_list, attribute_list=attribute_list, log_axes_attributes=[columns.StepSize], iqr_axis_scaling=True, ) for i, model in enumerate(model_list): output_dir_repeat = os.path.join(output_dir, "Repeat %i" % (i + 1)) if args.plot_preds: print("Plotting final predictions...") plotting.plot_data_predictions( plot_name="Final predictions", dir_name=output_dir_repeat, dataset=dataset, output_dim=args.output_dim, model=model, ) if args.plot_pred_gif: print("Plotting gif of predictions during training...") plotting.plot_predictions_gif( plot_name="Model predictions during training", dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, ) if args.plot_hidden_gif: print("Plotting gif of hidden layers during training...") if args.plot_hidden_preactivations_gif: plot_name = "Hidden layer preactivations during training" else: plot_name = "Hidden layer outputs during training" plotting.plot_hidden_outputs_gif( plot_name=plot_name, dir_name=output_dir_repeat, result=result_list[i], prediction_column=prediction_column_list[i], dataset=dataset, output_dim=args.output_dim, duration=args.t_eval * 1000, )
def main(args): np.random.seed(args.seed) # Initialise network model network = models.NeuralNetwork( input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, num_hidden_units=args.num_hidden_units, ) # Get output directory which is specific to the relevant script parameters param_str = " ".join([ "mnist" if args.use_mnist_data else "synthetic", "r%s" % args.regulariser, "e%s" % args.error_scale_coefficient, "u%s" % args.num_hidden_units, ]) if args.use_mnist_data: param_str += " " + " ".join([ "t%s" % args.mnist_num_train_tasks, "l%s" % args.mnist_train_distribution_label, "o%s" % args.mnist_out_of_distribution_label, ]) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Train Dinosaur", param_str, ) if os.path.isdir(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) print("Saving output plots in \"%s\"" % output_dir) os.system("explorer \"%s\"" % output_dir) # Initialise data if args.use_mnist_data: task_set, out_of_distribution_task = get_mnist_data(args) else: task_set, out_of_distribution_task = get_synthetic_data() # Initialise meta-learning model regulariser_type = models.dinosaur.regularisers.regulariser_names_dict[ args.regulariser] regulariser = regulariser_type( error_scale_coefficient=args.error_scale_coefficient, ) dinosaur = models.Dinosaur( network=network, regulariser=regulariser, primary_initialisation_task=task_set.task_list[0], secondary_initialisation_task=task_set.task_list[1], ) for _ in range(10): # Perform one outer-loop iteration of meta-learning dinosaur._result.display_headers() dinosaur.meta_learn( task_set, terminator=optimisers.Terminator(i_lim=1), ) # Check that the mean and scale are converging to sensible values print(regulariser.mean) print(regulariser.parameter_scale) print(regulariser.error_scale) # Compare adapting to an out-of-distribution task dinosaur.fast_adapt(out_of_distribution_task) # Plot training curves plotting.plot_training_curves([dinosaur._result], dir_name=output_dir) # Plot task predictions after meta-learning for i, task in enumerate(task_set.task_list): print("Plotting adaptations to task %i" % i) dinosaur.fast_adapt(task) plotting.plot_2D_regression( "Dinosaur task %i" % i, output_dir, task, OUTPUT_DIM, model=network, ) plot_hidden_activations( task, network, "Hidden activations for task %i" % i, output_dir, ) # Plot adaptation to out of distribution task print("Plotting adaptation to out of distribution task") dinosaur.fast_adapt(out_of_distribution_task) plotting.plot_2D_regression( "Dinosaur predictions for out-of-distribution task", output_dir, out_of_distribution_task, OUTPUT_DIM, model=network, ) plot_hidden_activations( out_of_distribution_task, network, "Hidden activations for out-of-distribution task", output_dir, ) # Plot adaptation to out of distribution task without regularisation print("Plotting adaptation without regularisation") if isinstance(regulariser, models.dinosaur.regularisers.Eve): ls = optimisers.LineSearch() dinosaur._optimiser = optimisers.GradientDescent(ls) else: network._regulariser.error_scale = 0 network.set_parameter_vector(regulariser.mean) dinosaur.fast_adapt(out_of_distribution_task) plotting.plot_2D_regression( "Dinosaur predictions for out-of-distribution task without " "regularisation", output_dir, out_of_distribution_task, OUTPUT_DIM, model=network, ) plot_hidden_activations( out_of_distribution_task, network, "Hidden activations for out-of-distribution task without " "regularisation", output_dir, )
def main( input_dim, output_dim, n_train, num_hidden_units, n_repeats, n_iters, n_plots, n_batch_sizes, min_batch_size, ylims, seed, batch_size_optimise, use_replacement, gif_duration ): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - n_repeats: positive integer number of repeats to perform of each batch size test - n_iters: total number of iterations to perform - n_plots: number of frames of the gif (equal to how many times optimisation will pause in order to sweep over the list of batch sizes) - n_batch_sizes: the number of different batch sizes to test for each iteration - min_batch_size: the smallest batch size to test - ylims: limits for the y-axes of each subplot of the output gif. Should be None, in which case the axis limits are calculated automatically, or an iterable containing 4 floats, in which the first 2 are the lower and upper axis limits for the left subplot, and the second 2 are the lower and upper axis limits for the right subplot - seed: random seed to use for the experiment - batch_size_optimise: batch size to use for standard optimisation iterations (IE not when sweeping over batch sizes). If ommitted, then the full training set is used as a batch during optimisation iterations - use_replacement: if True, then use replacement when sampling batches from the training set - gif_duration: time in seconds that the output gif should last for in total """ np.random.seed(seed) n_iters_per_plot = int(n_iters / n_plots) # Initialise model and dataset model = models.NeuralNetwork(input_dim, output_dim, num_hidden_units) freq = 1 if (input_dim == 1) else None sin_data = data.Sinusoidal(input_dim, output_dim, n_train, freq=freq) # Initialise objects for optimisation result = optimisers.Result() evaluator = optimisers.Evaluator(i_interval=n_iters_per_plot) terminator = optimisers.Terminator(i_lim=n_iters) if batch_size_optimise is None: batch_getter = optimisers.batch.FullTrainingSet() else: batch_getter = optimisers.batch.ConstantBatchSize( batch_size_optimise, use_replacement ) line_search = optimisers.LineSearch() # Initialise OptimalBatchSize column and add to the result object gd_optimiser = optimisers.GradientDescent(line_search) optimal_batch_size_col = optimisers.results.columns.OptimalBatchSize( gd_optimiser, sin_data.n_train, n_repeats=n_repeats, n_batch_sizes=n_batch_sizes ) result.add_column(optimal_batch_size_col) # Get output directory which is specific to the script parameters param_str = ", ".join([ "input_dim = %i" % input_dim, "output_dim = %i" % output_dim, "n_train = %i" % n_train, "n_iters = %i" % n_iters, "batch_size_optimise = %r" % batch_size_optimise, "use_replacement = %r" % use_replacement, "ylims = %r" % ylims, "n_plots = %i" % n_plots, ]) current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join( current_dir, "Outputs", "Error vs batch", param_str ) # Call optimisation function gd_optimiser.optimise( model, sin_data, result=result, batch_getter=batch_getter, terminator=terminator, evaluator=evaluator, ) # Make output plots print("Plotting output plots in \"%s\"..." % output_dir) frame_duration_ms = 1000 * gif_duration / n_plots if ylims is None: y_lim_left = None y_lim_right = None else: y_lim_left = ylims[:2] y_lim_right = ylims[2:] plotting.plot_error_reductions_vs_batch_size_gif( result, optimal_batch_size_col, output_dir, y_lim_left=y_lim_left, y_lim_right=y_lim_right, duration=frame_duration_ms, loop=None ) plotting.plot_optimal_batch_sizes( "Optimal batch size", output_dir, result, optimal_batch_size_col, )
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units, e_lims, n_repeats): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - batch_size: positive integer batch size to use for training - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network and store initial parameters n = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) w0 = n.get_parameter_vector().copy() # Call gradient descent function result_gd_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_ls) # Try again without line search n.set_parameter_vector(w0) result_gd_no_ls = optimisers.gradient_descent( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="SGD without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_gd_no_ls) # Call generalised Newton function n.set_parameter_vector(w0) result_pbgn_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN with line search", verbose=True), line_search=optimisers.LineSearch(), batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_ls) # Try again without line search n.set_parameter_vector(w0) result_pbgn_no_ls = optimisers.generalised_newton( n, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=optimisers.Result(name="PBGN without line search", verbose=True), line_search=None, batch_getter=optimisers.batch.ConstantBatchSize(batch_size)) results_list.append(result_pbgn_no_ls) # Get name of output directory current_dir = os.path.dirname(os.path.abspath(__file__)) output_dir = os.path.join(current_dir, "Outputs") # Compare training curves plot_name = "Comparing gradient descent vs generalised Newton" plot_name += ", %iD-%iD data" % (input_dim, output_dim) plot_name += ", %.2g s training time" % t_lim plot_name += ", %s hidden units" % str(num_hidden_units) plotting.plot_training_curves(results_list, plot_name, output_dir, e_lims=e_lims)
df_test_new_pred = gs_model.best_estimator_.predict(df_test_new) # print(df_test_new_pred) models.plot_confusion_matrix(df_test_new_target, df_test_new_pred, names) # Learning curve models.plot_learning_curve(gs_model.best_estimator_, "Decision Tree", \ df_test, df_test_target, cv=5, train_sizes=np.arange(2000,5000,1000)) print( metrics.classification_report(df_test_new_target, df_test_new_pred, target_names=names)) if (model_name.upper() == "N"): model = models.NeuralNetwork() # df_test = df_test # df_test_target = df_test_target # df_test_new = df_test_new # df_test_new_target = df_test_new if manual.upper() == "Y": params = { 'hidden_layer_sizes': 3, 'batch_size': 1000, 'activation': 'relu' } # 'class_weight':"balanced"} model.clf.set_params(**params) start_time = time.time() model.clf.set_params(**params) model.clf.fit(df_test, df_test_target)
def main(input_dim, output_dim, n_train, t_lim, num_hidden_units, e_lims, n_repeats, alpha_smooth, p_c, min_batch_size): """ Main function for the script. See module docstring for more info. Inputs: - input_dim: positive integer number of input dimensions - output_dim: positive integer number of output dimensions - n_train: positive integer number of points in the training set - t_lim: positive float, length of time to train for each experiment - num_hidden_units: list of positive integers, number of hidden units in each hidden layer of the NeuralNetwork, EG [10] or [20, 20] - e_lims: list of 2 floats, used as axis limits in the output plots - n_repeats: positive integer number of repeats to perform of each experiment - alpha_smooth: float in (0, 1), amount of smoothing to apply to DBS batch size """ # Perform warmup experiment so process acquires priority optimisers.warmup() # Initialise data, results list, and time interval for evaluations np.random.seed(9251) sin_data = data.Sinusoidal( input_dim=input_dim, output_dim=output_dim, n_train=n_train, ) results_list = [] t_interval = t_lim / 50 for i in range(n_repeats): # Set the random seed np.random.seed(i) # Generate random network model = models.NeuralNetwork(input_dim=input_dim, output_dim=output_dim, num_hidden_units=num_hidden_units, act_funcs=[ models.activations.gaussian, models.activations.identity ]) # Call gradient descent function result = optimisers.Result("Repeat = %i" % i) batch_getter = optimisers.batch.DynamicBatchSize( model, sin_data, alpha_smooth=alpha_smooth, prob_correct_direction=p_c, min_batch_size=min_batch_size) batch_col = optimisers.results.columns.BatchSize(batch_getter) dbs_col = optimisers.results.columns.DbsMetric() result.add_column(batch_col) result.add_column(dbs_col) result = optimisers.gradient_descent( model, sin_data, terminator=optimisers.Terminator(t_lim=t_lim), evaluator=optimisers.Evaluator(t_interval=t_interval), result=result, line_search=optimisers.LineSearch(), batch_getter=batch_getter) results_list.append(result) # Compare training curves plot_name_suffix = "\n%iD-%iD data" % (input_dim, output_dim) plot_name_suffix += ", %.2g s training time" % t_lim plot_name_suffix += ", %s hidden units" % str(num_hidden_units) plot_name_suffix += "\nalpha_smooth = %.3f" % alpha_smooth plot_name_suffix += ", p_c = %.3f" % p_c plot_name_suffix += ", min_batch_size = %.3f" % min_batch_size this_test_output_dir = os.path.join(output_dir, plot_name_suffix.replace("\n", "")) plotting.plot_training_curves(results_list, "DBS learning curves" + plot_name_suffix, this_test_output_dir, e_lims=e_lims) for col in [dbs_col, batch_col]: plot_name = "%s against iteration for dynamic batch size" % col.name plot_name += plot_name_suffix plotting.plot_result_attribute(plot_name, this_test_output_dir, results_list, type(col))