def test_pbgn(seed, reuse_block_inds):
    """
    Test the Generalised Newton's method for optimisation, using parallel
    block-diagonal approximations.

    TODO: combine this and the gradient descent test (and any optimisers
    implemented in future, EG adam, PSO) in to a single parametrised test
    """
    # Set the random seed
    np.random.seed(seed)
    # Generate random number of iterations, network, data, and results file
    n_iters = np.random.randint(10, 20)
    n = NeuralNetwork(input_dim=1,
                      output_dim=1,
                      num_hidden_units=[4, 8, 6],
                      act_funcs=[activations.gaussian, activations.identity])
    sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1)
    name = "Test PBGN without line-search, reuse_block_inds={}".format(
        reuse_block_inds)
    results_filename = "{}.txt".format(name)
    results_path = os.path.join(output_dir, results_filename)
    results_file = open(results_path, "w")
    result = optimisers.Result(name=name, verbose=True, file=results_file)
    # Call gradient descent function
    result_ls = optimisers.generalised_newton(
        n,
        sin_data,
        terminator=optimisers.Terminator(i_lim=n_iters),
        evaluator=optimisers.Evaluator(i_interval=1),
        result=result,
        reuse_block_inds=reuse_block_inds)

    results_file.close()
Beispiel #2
0
def make_smudge_plots(objective):
    smudge_plot(objective, lambda f, x: optimisers.gradient_descent(
        f, x, n_iters=1, line_search_flag=False, learning_rate=2e-1
    ), name="SGD smudge plot")
    smudge_plot(objective, lambda f, x: optimisers.gradient_descent(
        f, x, n_iters=1, line_search_flag=True, beta=0.99, alpha=0.2
    # ), name="SGD+LS smudge plot", n_lines=3)
    ), name="SGD+LS smudge plot", nx0_sm=6, nx1_sm=6, n_lines=2)
    smudge_plot(objective, lambda f, x: optimisers.generalised_newton(
        f, x, n_iters=1, line_search_flag=False, learning_rate=0
    ), name="GN smudge plot")
    smudge_plot(objective, lambda f, x: optimisers.rectified_newton(
        f, x, n_iters=1, line_search_flag=False, learning_rate=0
    ), name="RN smudge plot")
Beispiel #3
0
def run_experiment(dataset, num_units, num_layers, log10_learning_rate,
                   max_block_size, log10_max_step, reuse_block_inds, act_func):
    n = NeuralNetwork(input_dim=1,
                      output_dim=1,
                      num_hidden_units=[num_units for _ in range(num_layers)],
                      act_funcs=[act_func, activations.Identity()])
    result = optimisers.generalised_newton(
        n,
        dataset,
        learning_rate=pow(10, log10_learning_rate),
        max_block_size=max_block_size,
        max_step=pow(10, log10_max_step),
        terminator=optimisers.Terminator(t_lim=3),
        evaluator=optimisers.Evaluator(t_interval=0.1),
        line_search=None)
    return result
Beispiel #4
0
def compare_optimisers_dimensional_efficiency():
    n_dims_list = np.unique(np.logspace(0, 3, 10, dtype=np.int))
    optimiser_list = [
        lambda f, x, f_lim: optimisers.gradient_descent(f, x, f_lim=f_lim,
            line_search_flag=True, n_iters=np.inf, t_lim=5),
        lambda f, x, f_lim: optimisers.generalised_newton(f, x, f_lim=f_lim,
            line_search_flag=True, n_iters=np.inf, t_lim=5),
        lambda f, x, f_lim: optimisers.block_generalised_newton(f, x,
            f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5),
        lambda f, x, f_lim: optimisers.parallel_block_generalised_newton(f, x,
            f_lim=f_lim, line_search_flag=True, n_iters=np.inf, t_lim=5,
            block_size=3),
        lambda f, x, f_lim: optimisers.rectified_newton(f, x, f_lim=f_lim,
            line_search_flag=True, n_iters=np.inf, t_lim=5),
    ]
    plot_dimensional_efficiency(optimiser_list, objectives.Gaussian,
        n_dims_list, distance_ratio=3,
        name="Dimensional efficiency of different optimisers")
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units,
         e_lims, n_repeats):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   input_dim: positive integer number of input dimensions
    -   output_dim: positive integer number of output dimensions
    -   n_train: positive integer number of points in the training set
    -   batch_size: positive integer batch size to use for training
    -   t_lim: positive float, length of time to train for each experiment
    -   num_hidden_units: list of positive integers, number of hidden units in
        each hidden layer of the NeuralNetwork, EG [10] or [20, 20]
    -   e_lims: list of 2 floats, used as axis limits in the output plots
    -   n_repeats: positive integer number of repeats to perform of each
        experiment
    """
    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Initialise data, results list, and time interval for evaluations
    np.random.seed(9251)
    sin_data = data.Sinusoidal(
        input_dim=input_dim,
        output_dim=output_dim,
        n_train=n_train,
    )
    results_list = []
    t_interval = t_lim / 50

    for i in range(n_repeats):
        # Set the random seed
        np.random.seed(i)
        # Generate random network and store initial parameters
        n = models.NeuralNetwork(input_dim=input_dim,
                                 output_dim=output_dim,
                                 num_hidden_units=num_hidden_units,
                                 act_funcs=[
                                     models.activations.gaussian,
                                     models.activations.identity
                                 ])
        w0 = n.get_parameter_vector().copy()
        # Call gradient descent function
        result_gd_ls = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="SGD with line search",
                                     verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_gd_ls)
        # Try again without line search
        n.set_parameter_vector(w0)
        result_gd_no_ls = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="SGD without line search",
                                     verbose=True),
            line_search=None,
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_gd_no_ls)
        # Call generalised Newton function
        n.set_parameter_vector(w0)
        result_pbgn_ls = optimisers.generalised_newton(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="PBGN with line search",
                                     verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_pbgn_ls)
        # Try again without line search
        n.set_parameter_vector(w0)
        result_pbgn_no_ls = optimisers.generalised_newton(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="PBGN without line search",
                                     verbose=True),
            line_search=None,
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_pbgn_no_ls)

    # Get name of output directory
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(current_dir, "Outputs")

    # Compare training curves
    plot_name = "Comparing gradient descent vs generalised Newton"
    plot_name += ", %iD-%iD data" % (input_dim, output_dim)
    plot_name += ", %.2g s training time" % t_lim
    plot_name += ", %s hidden units" % str(num_hidden_units)
    plotting.plot_training_curves(results_list,
                                  plot_name,
                                  output_dir,
                                  e_lims=e_lims)