Ejemplo n.º 1
0
def test_gradient_descent_line_search(seed):
    """
    Test gradient descent, using a line-search. A line-search should guarantee
    that each iteration reduces the error function, so this is tested using
    assert statements after calling the gradient_descent function.
    """
    # Set the random seed
    np.random.seed(seed)
    # Generate random number of iterations, network, data, and results file
    n_iters = np.random.randint(10, 20)
    n = get_random_network(input_dim=1, output_dim=1)
    sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1)
    results_filename = (
        "Test gradient descent with line-search, seed = %i.txt" % seed)
    results_path = os.path.join(output_dir, results_filename)
    results_file = open(results_path, "w")
    result = optimisers.Result(name="SGD with line search",
                               verbose=True,
                               file=results_file)
    # Add step size column to result
    ls = optimisers.LineSearch(max_its=int(1e10))
    result.add_column(optimisers.results.columns.StepSize(ls))
    # Call gradient descent function
    result_ls = optimisers.gradient_descent(
        n,
        sin_data,
        terminator=optimisers.Terminator(i_lim=n_iters),
        evaluator=optimisers.Evaluator(i_interval=1),
        line_search=ls,
        result=result)
    # Make sure each iteration reduces the training error
    train_error_list = result.get_values(optimisers.results.columns.TrainError)
    for i in range(len(train_error_list) - 1):
        assert train_error_list[i + 1] < train_error_list[i]

    results_file.close()
Ejemplo n.º 2
0
# Generate random network and data
n = models.NeuralNetwork(
    input_dim=1,
    output_dim=1,
    num_hidden_units=[10],
    act_funcs=[models.activations.cauchy, models.activations.identity])
sin_data = data.Sinusoidal(input_dim=1, output_dim=1, freq=1)

# Call gradient descent function
result = optimisers.gradient_descent(
    n,
    sin_data,
    terminator=optimisers.Terminator(t_lim=t_lim),
    evaluator=optimisers.Evaluator(t_interval=t_interval),
    result=optimisers.Result(name="SGD with line search", verbose=True),
    line_search=optimisers.LineSearch())

# Plot predictions
x_pred = np.linspace(-2, 2, 200).reshape(1, -1)
y_pred = n.forward_prop(x_pred)
plotting.plot_1D_regression("Gradient descent predictions for 1D sin data",
                            output_dir, sin_data, x_pred, y_pred)

# Plot learning curve
plotting.plot_training_curves(
    [result],
    "Gradient descent learning curves for 1D sin data",
    output_dir,
    e_lims=[0, 0.02])
Ejemplo n.º 3
0
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units,
         e_lims, n_repeats):
    """
    Main function for this script, wrapped by argparse for command-line
    arguments.
    """

    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Initialise data, time limit, and results list
    np.random.seed(9251)
    sin_data = data.Sinusoidal(input_dim=input_dim,
                               output_dim=output_dim,
                               n_train=n_train)
    t_interval = t_lim / 50
    results_list = []

    for i in range(n_repeats):
        # Set the random seed
        np.random.seed(i)
        # Generate random network
        n = models.NeuralNetwork(
            input_dim=input_dim,
            output_dim=output_dim,
            num_hidden_units=num_hidden_units,
            act_funcs=[models.activations.cauchy, models.activations.identity],
            initialiser=models.initialisers.ConstantPreActivationStatistics(
                sin_data.x_train, sin_data.y_train))
        # Set name for experiment
        name = "Constant pre-activation statistics"
        # Call gradient descent function
        result = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name=name, verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result)

        # Generate random network
        n = models.NeuralNetwork(
            input_dim=input_dim,
            output_dim=output_dim,
            num_hidden_units=num_hidden_units,
            act_funcs=[models.activations.cauchy, models.activations.identity],
            initialiser=models.initialisers.ConstantParameterStatistics())
        # Set name for experiment
        name = "Constant parameter statistics"
        # Call gradient descent function
        result = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name=name, verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result)

    # Get name of output directory
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(current_dir, "Outputs")

    # Compare training curves
    plotting.plot_training_curves(
        results_list,
        "Comparing initialisers for gradient descent on 2D sinusoidal data",
        output_dir,
        e_lims=e_lims,
        tp=0.5)
Ejemplo n.º 4
0
def test_predictions_column(
    input_dim,
    output_dim,
    store_hidden,
    store_preactivations,
):
    """ Test using a column which stores model predictions during training """
    # Set random seed and initialise network and dataset
    set_random_seed_from_args(
        "test_predictions_column",
        input_dim,
        output_dim,
        store_hidden,
    )
    n_train = np.random.randint(10, 20)
    n_pred = ceil(pow(np.random.randint(5, 10), 1 / input_dim))
    n_its = np.random.randint(10, 20)
    model = get_random_network(input_dim=input_dim, output_dim=output_dim)
    sin_data = data.Sinusoidal(
        input_dim=input_dim,
        output_dim=output_dim,
        n_train=n_train,
    )
    # Initialise output file and Result object
    test_name = "test_predictions_column, %id-%id data, store_hidden=%s" % (
        input_dim,
        output_dim,
        store_hidden,
    )
    output_filename = "%s.txt" % test_name
    with open(os.path.join(output_dir, output_filename), "w") as f:
        # Initialise result object
        result = optimisers.Result(name=test_name,
                                   file=f,
                                   add_default_columns=True)
        # Initialise column object and add to the result
        columns = optimisers.results.columns
        prediction_column = columns.Predictions(
            sin_data,
            n_points_per_dim=n_pred,
            store_hidden_layer_outputs=store_hidden,
            store_hidden_layer_preactivations=store_preactivations,
        )
        result.add_column(prediction_column)
        # Call optimisation function
        optimisers.gradient_descent(
            model,
            sin_data,
            result=result,
            terminator=optimisers.Terminator(i_lim=n_its),
            evaluator=optimisers.Evaluator(i_interval=1),
        )
        # Print Predictions column attributes to file
        print("\n\nx_pred:", prediction_column.x_pred, sep="\n", file=f)
        iter_list = result.get_values(columns.Iteration)
        print("\n\nPredictions:", file=f)
        for i in iter_list:
            print("i = %i:" % i, file=f)
            print(prediction_column.predictions_dict[i], file=f)
        if store_hidden:
            print("\n\nHidden layer outputs:", file=f)
            for i in iter_list:
                print(
                    "\ni = %i:" % i,
                    *prediction_column.hidden_outputs_dict[i],
                    file=f,
                    sep="\n\n",
                )

    # Test that the Prediction object attributes are as expected
    n_pred_grid = pow(n_pred, input_dim)
    assert prediction_column.x_pred.shape == (input_dim, n_pred_grid)

    iter_set = set(iter_list)
    assert set(prediction_column.predictions_dict.keys()) == iter_set
    for y_pred in prediction_column.predictions_dict.values():
        assert y_pred.shape == (output_dim, n_pred_grid)

    hidden_outputs_dict = prediction_column.hidden_outputs_dict
    if store_hidden:
        assert set(hidden_outputs_dict.keys()) == iter_set
        for hidden_output_list in hidden_outputs_dict.values():
            assert len(hidden_output_list) == len(model.layers) - 1
            for i, hidden_output in enumerate(hidden_output_list):
                expected_shape = (model.layers[i].output_dim, n_pred_grid)
                assert hidden_output.shape == expected_shape
    else:
        assert len(hidden_outputs_dict) == 0
Ejemplo n.º 5
0
def main(args):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   args: object containing modified command line arguments as attributes
    """
    np.random.seed(args.seed)

    # Get output directory which is specific to the relevant script parameters
    param_str = " ".join([
        "d%s" % args.dataset_type.__name__[:3],
        "i%s" % args.input_dim,
        "o%s" % args.output_dim,
        "t%s" % args.t_lim,
        "n%s" % args.n_train,
        "b%s" % args.batch_size,
        "u%s" % args.num_hidden_units,
    ])
    if args.dynamic_terminator:
        dt_str = "dyn"
        if args.dt_smooth_output:
            dt_str += "sOut"
        if args.dt_smooth_mrse:
            dt_str += "sMrStd"
        param_str += " %s%i" % (dt_str, args.dt_buffer_length)

    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(
        current_dir,
        "Outputs",
        "Train gradient descent",
        param_str,
    )
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Perform warmup experiment
    optimisers.warmup()

    # Initialise lists of objects that will be stored for each repeat
    result_list = []
    model_list = []
    prediction_column_list = []

    # Initialise dataset object and corresponding error function
    dataset_kwargs = {
        "input_dim": args.input_dim,
        "n_train": args.n_train,
    }
    if not issubclass(args.dataset_type, data.BinaryClassification):
        dataset_kwargs["output_dim"] = args.output_dim
    dataset = args.dataset_type(**dataset_kwargs)

    if isinstance(dataset, data.Regression):
        error_func = models.errors.sum_of_squares
        act_funcs = None
        print("Using regression data set with sum of squares error function")
    elif isinstance(dataset, data.BinaryClassification):
        error_func = models.errors.binary_cross_entropy
        act_funcs = [models.activations.gaussian, models.activations.logistic]
        print("Using binary classification data set with binary cross-entropy "
              "error function, and logistic activation function in the output "
              "layer")
    elif isinstance(dataset, data.Classification):
        error_func = models.errors.softmax_cross_entropy
        act_funcs = None
        print("Using classification data set with softmax cross entropy error "
              "function")
    else:
        raise ValueError(
            "Data set must be either a binary-classification, multi-class "
            "classification or regression data set")

    # Iterate through repeats
    for i in range(args.n_repeats):
        # Initialise model and Result object
        model = models.NeuralNetwork(
            input_dim=args.input_dim,
            output_dim=args.output_dim,
            num_hidden_units=args.num_hidden_units,
            error_func=error_func,
            act_funcs=act_funcs,
        )

        result = optimisers.Result(name="Repeat %i" % (i + 1))

        if args.line_search is not None:
            args.line_search_col = columns.StepSize(args.line_search)
            result.add_column(args.line_search_col)

        if args.plot_pred_gif or args.plot_hidden_gif:
            pred_column = columns.Predictions(
                dataset=dataset,
                store_hidden_layer_outputs=args.plot_hidden_gif,
                store_hidden_layer_preactivations=(
                    args.plot_hidden_preactivations_gif),
            )
            result.add_column(pred_column)

        if args.plot_test_set_improvement_probability:
            test_set_improvement_column = (
                columns.TestSetImprovementProbabilitySimple(
                    model,
                    dataset,
                    smoother=optimisers.smooth.MovingAverage(1, n=10),
                ))
            result.add_column(test_set_improvement_column)

        if args.dynamic_terminator:
            dynamic_terminator = optimisers.DynamicTerminator(
                model=model,
                dataset=dataset,
                batch_size=args.batch_size,
                replace=False,
                t_lim=args.t_lim,
                smooth_n=args.dt_buffer_length,
                smooth_x0=args.dt_x0,
                smooth_output=args.dt_smooth_output,
                smooth_mean_reduction=args.dt_smooth_mrse,
                smooth_std=args.dt_smooth_mrse,
            )
            terminator = dynamic_terminator
            batch_getter = dynamic_terminator

            dynamic_terminator_column = columns.BatchImprovementProbability(
                dynamic_terminator, )
            result.add_column(dynamic_terminator_column)
        else:
            terminator = optimisers.Terminator(t_lim=args.t_lim)
            batch_getter = optimisers.batch.ConstantBatchSize(
                args.batch_size,
                True,
            )

        # Perform gradient descent
        optimisers.gradient_descent(
            model,
            dataset,
            line_search=args.line_search,
            result=result,
            evaluator=optimisers.Evaluator(t_interval=args.t_eval),
            terminator=terminator,
            batch_getter=batch_getter,
        )

        # Store results
        result_list.append(result)
        model_list.append(model)
        if args.plot_pred_gif or args.plot_hidden_gif:
            prediction_column_list.append(pred_column)

    # Make output plots
    print("Plotting output plots in \"%s\"..." % output_dir)
    os.system("explorer \"%s\"" % output_dir)
    print("Plotting training curves...")
    plotting.plot_training_curves(
        result_list,
        dir_name=output_dir,
        e_lims=args.error_lims,
    )
    if args.plot_test_set_improvement_probability or args.dynamic_terminator:
        attribute_list = [
            columns.TrainError,
            columns.TestError,
            columns.StepSize,
        ]
        if args.plot_test_set_improvement_probability:
            print("Plotting test set improvement probability...")
            attribute_list.append(columns.TestSetImprovementProbabilitySimple)
        if args.dynamic_terminator:
            print("Plotting batch improvement probability...")
            attribute_list.append(columns.BatchImprovementProbability)
        plotting.plot_result_attributes_subplots(
            plot_name="Improvement probability\n%s" % param_str,
            dir_name=output_dir,
            result_list=result_list,
            attribute_list=attribute_list,
            log_axes_attributes=[columns.StepSize],
            iqr_axis_scaling=True,
        )

    for i, model in enumerate(model_list):
        output_dir_repeat = os.path.join(output_dir, "Repeat %i" % (i + 1))
        if args.plot_preds:
            print("Plotting final predictions...")
            plotting.plot_data_predictions(
                plot_name="Final predictions",
                dir_name=output_dir_repeat,
                dataset=dataset,
                output_dim=args.output_dim,
                model=model,
            )
        if args.plot_pred_gif:
            print("Plotting gif of predictions during training...")
            plotting.plot_predictions_gif(
                plot_name="Model predictions during training",
                dir_name=output_dir_repeat,
                result=result_list[i],
                prediction_column=prediction_column_list[i],
                dataset=dataset,
                output_dim=args.output_dim,
                duration=args.t_eval * 1000,
            )
        if args.plot_hidden_gif:
            print("Plotting gif of hidden layers during training...")
            if args.plot_hidden_preactivations_gif:
                plot_name = "Hidden layer preactivations during training"
            else:
                plot_name = "Hidden layer outputs during training"

            plotting.plot_hidden_outputs_gif(
                plot_name=plot_name,
                dir_name=output_dir_repeat,
                result=result_list[i],
                prediction_column=prediction_column_list[i],
                dataset=dataset,
                output_dim=args.output_dim,
                duration=args.t_eval * 1000,
            )
Ejemplo n.º 6
0
        num_hidden_units=[20, 20],
        act_funcs=[models.activations.cauchy, models.activations.identity])
    w0 = n.get_parameter_vector().copy()
    # Iterate through constant size batch-getters
    for batch_size in batch_size_list:
        # Set name for experiment
        name = "Batch size = {:04d}".format(int(batch_size))
        # Reset parameter vector
        n.set_parameter_vector(w0)
        # Call gradient descent function
        result = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name=name, verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(int(batch_size)))
        results_list.append(result)

    # Try again with full training set
    n.set_parameter_vector(w0)
    result = optimisers.gradient_descent(
        n,
        sin_data,
        terminator=optimisers.Terminator(t_lim=t_lim),
        evaluator=optimisers.Evaluator(t_interval=t_interval),
        line_search=optimisers.LineSearch(),
        batch_getter=optimisers.batch.FullTrainingSet(),
        result=optimisers.Result(name="Full training set", verbose=True),
    )
Ejemplo n.º 7
0
def main(
    input_dim,
    output_dim,
    n_train,
    num_hidden_units,
    n_repeats,
    n_iters,
    n_plots,
    n_batch_sizes,
    min_batch_size,
    ylims,
    seed,
    batch_size_optimise,
    use_replacement,
    gif_duration
):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   input_dim: positive integer number of input dimensions
    -   output_dim: positive integer number of output dimensions
    -   n_train: positive integer number of points in the training set
    -   num_hidden_units: list of positive integers, number of hidden units in
        each hidden layer of the NeuralNetwork, EG [10] or [20, 20]
    -   n_repeats: positive integer number of repeats to perform of each batch
        size test
    -   n_iters: total number of iterations to perform
    -   n_plots: number of frames of the gif (equal to how many times
        optimisation will pause in order to sweep over the list of batch sizes)
    -   n_batch_sizes: the number of different batch sizes to test for each
        iteration
    -   min_batch_size: the smallest batch size to test
    -   ylims: limits for the y-axes of each subplot of the output gif. Should
        be None, in which case the axis limits are calculated automatically, or
        an iterable containing 4 floats, in which the first 2 are the lower and
        upper axis limits for the left subplot, and the second 2 are the lower
        and upper axis limits for the right subplot
    -   seed: random seed to use for the experiment
    -   batch_size_optimise: batch size to use for standard optimisation
        iterations (IE not when sweeping over batch sizes). If ommitted, then
        the full training set is used as a batch during optimisation iterations
    -   use_replacement: if True, then use replacement when sampling batches
        from the training set
    -   gif_duration: time in seconds that the output gif should last for in
        total
    """
    np.random.seed(seed)
    n_iters_per_plot = int(n_iters / n_plots)

    # Initialise model and dataset
    model = models.NeuralNetwork(input_dim, output_dim, num_hidden_units)
    freq = 1 if (input_dim == 1) else None
    sin_data = data.Sinusoidal(input_dim, output_dim, n_train, freq=freq)
    
    # Initialise objects for optimisation
    result = optimisers.Result()
    evaluator = optimisers.Evaluator(i_interval=n_iters_per_plot)
    terminator = optimisers.Terminator(i_lim=n_iters)
    if batch_size_optimise is None:
        batch_getter = optimisers.batch.FullTrainingSet()
    else:
        batch_getter = optimisers.batch.ConstantBatchSize(
            batch_size_optimise,
            use_replacement
        )
    line_search = optimisers.LineSearch()

    # Initialise OptimalBatchSize column and add to the result object
    gd_optimiser = optimisers.GradientDescent(line_search)
    optimal_batch_size_col = optimisers.results.columns.OptimalBatchSize(
        gd_optimiser,
        sin_data.n_train,
        n_repeats=n_repeats,
        n_batch_sizes=n_batch_sizes
    )
    result.add_column(optimal_batch_size_col)

    # Get output directory which is specific to the script parameters
    param_str = ", ".join([
        "input_dim = %i"            % input_dim,
        "output_dim = %i"           % output_dim,
        "n_train = %i"              % n_train,
        "n_iters = %i"              % n_iters,
        "batch_size_optimise = %r"  % batch_size_optimise,
        "use_replacement = %r"      % use_replacement,
        "ylims = %r"                % ylims,
        "n_plots = %i"              % n_plots,
    ])
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(
        current_dir,
        "Outputs",
        "Error vs batch",
        param_str
    )

    # Call optimisation function
    gd_optimiser.optimise(
        model,
        sin_data,
        result=result,
        batch_getter=batch_getter,
        terminator=terminator,
        evaluator=evaluator,
    )

    # Make output plots
    print("Plotting output plots in \"%s\"..." % output_dir)
    frame_duration_ms = 1000 * gif_duration / n_plots
    if ylims is None:
        y_lim_left = None
        y_lim_right = None
    else:
        y_lim_left = ylims[:2]
        y_lim_right = ylims[2:]
    plotting.plot_error_reductions_vs_batch_size_gif(
        result,
        optimal_batch_size_col,
        output_dir,
        y_lim_left=y_lim_left,
        y_lim_right=y_lim_right,
        duration=frame_duration_ms,
        loop=None
    )
    plotting.plot_optimal_batch_sizes(
        "Optimal batch size",
        output_dir,
        result,
        optimal_batch_size_col,
    )
Ejemplo n.º 8
0
def main(input_dim, output_dim, n_train, batch_size, t_lim, num_hidden_units,
         e_lims, n_repeats):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   input_dim: positive integer number of input dimensions
    -   output_dim: positive integer number of output dimensions
    -   n_train: positive integer number of points in the training set
    -   batch_size: positive integer batch size to use for training
    -   t_lim: positive float, length of time to train for each experiment
    -   num_hidden_units: list of positive integers, number of hidden units in
        each hidden layer of the NeuralNetwork, EG [10] or [20, 20]
    -   e_lims: list of 2 floats, used as axis limits in the output plots
    -   n_repeats: positive integer number of repeats to perform of each
        experiment
    """
    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Initialise data, results list, and time interval for evaluations
    np.random.seed(9251)
    sin_data = data.Sinusoidal(
        input_dim=input_dim,
        output_dim=output_dim,
        n_train=n_train,
    )
    results_list = []
    t_interval = t_lim / 50

    for i in range(n_repeats):
        # Set the random seed
        np.random.seed(i)
        # Generate random network and store initial parameters
        n = models.NeuralNetwork(input_dim=input_dim,
                                 output_dim=output_dim,
                                 num_hidden_units=num_hidden_units,
                                 act_funcs=[
                                     models.activations.gaussian,
                                     models.activations.identity
                                 ])
        w0 = n.get_parameter_vector().copy()
        # Call gradient descent function
        result_gd_ls = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="SGD with line search",
                                     verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_gd_ls)
        # Try again without line search
        n.set_parameter_vector(w0)
        result_gd_no_ls = optimisers.gradient_descent(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="SGD without line search",
                                     verbose=True),
            line_search=None,
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_gd_no_ls)
        # Call generalised Newton function
        n.set_parameter_vector(w0)
        result_pbgn_ls = optimisers.generalised_newton(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="PBGN with line search",
                                     verbose=True),
            line_search=optimisers.LineSearch(),
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_pbgn_ls)
        # Try again without line search
        n.set_parameter_vector(w0)
        result_pbgn_no_ls = optimisers.generalised_newton(
            n,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=optimisers.Result(name="PBGN without line search",
                                     verbose=True),
            line_search=None,
            batch_getter=optimisers.batch.ConstantBatchSize(batch_size))
        results_list.append(result_pbgn_no_ls)

    # Get name of output directory
    current_dir = os.path.dirname(os.path.abspath(__file__))
    output_dir = os.path.join(current_dir, "Outputs")

    # Compare training curves
    plot_name = "Comparing gradient descent vs generalised Newton"
    plot_name += ", %iD-%iD data" % (input_dim, output_dim)
    plot_name += ", %.2g s training time" % t_lim
    plot_name += ", %s hidden units" % str(num_hidden_units)
    plotting.plot_training_curves(results_list,
                                  plot_name,
                                  output_dir,
                                  e_lims=e_lims)
Ejemplo n.º 9
0
def main(input_dim, output_dim, n_train, t_lim, num_hidden_units, e_lims,
         n_repeats, alpha_smooth, p_c, min_batch_size):
    """
    Main function for the script. See module docstring for more info.

    Inputs:
    -   input_dim: positive integer number of input dimensions
    -   output_dim: positive integer number of output dimensions
    -   n_train: positive integer number of points in the training set
    -   t_lim: positive float, length of time to train for each experiment
    -   num_hidden_units: list of positive integers, number of hidden units in
        each hidden layer of the NeuralNetwork, EG [10] or [20, 20]
    -   e_lims: list of 2 floats, used as axis limits in the output plots
    -   n_repeats: positive integer number of repeats to perform of each
        experiment
    -   alpha_smooth: float in (0, 1), amount of smoothing to apply to DBS batch
        size
    """
    # Perform warmup experiment so process acquires priority
    optimisers.warmup()

    # Initialise data, results list, and time interval for evaluations
    np.random.seed(9251)
    sin_data = data.Sinusoidal(
        input_dim=input_dim,
        output_dim=output_dim,
        n_train=n_train,
    )
    results_list = []
    t_interval = t_lim / 50

    for i in range(n_repeats):
        # Set the random seed
        np.random.seed(i)
        # Generate random network
        model = models.NeuralNetwork(input_dim=input_dim,
                                     output_dim=output_dim,
                                     num_hidden_units=num_hidden_units,
                                     act_funcs=[
                                         models.activations.gaussian,
                                         models.activations.identity
                                     ])
        # Call gradient descent function
        result = optimisers.Result("Repeat = %i" % i)
        batch_getter = optimisers.batch.DynamicBatchSize(
            model,
            sin_data,
            alpha_smooth=alpha_smooth,
            prob_correct_direction=p_c,
            min_batch_size=min_batch_size)
        batch_col = optimisers.results.columns.BatchSize(batch_getter)
        dbs_col = optimisers.results.columns.DbsMetric()
        result.add_column(batch_col)
        result.add_column(dbs_col)
        result = optimisers.gradient_descent(
            model,
            sin_data,
            terminator=optimisers.Terminator(t_lim=t_lim),
            evaluator=optimisers.Evaluator(t_interval=t_interval),
            result=result,
            line_search=optimisers.LineSearch(),
            batch_getter=batch_getter)
        results_list.append(result)

    # Compare training curves
    plot_name_suffix = "\n%iD-%iD data" % (input_dim, output_dim)
    plot_name_suffix += ", %.2g s training time" % t_lim
    plot_name_suffix += ", %s hidden units" % str(num_hidden_units)
    plot_name_suffix += "\nalpha_smooth = %.3f" % alpha_smooth
    plot_name_suffix += ", p_c = %.3f" % p_c
    plot_name_suffix += ", min_batch_size = %.3f" % min_batch_size
    this_test_output_dir = os.path.join(output_dir,
                                        plot_name_suffix.replace("\n", ""))
    plotting.plot_training_curves(results_list,
                                  "DBS learning curves" + plot_name_suffix,
                                  this_test_output_dir,
                                  e_lims=e_lims)
    for col in [dbs_col, batch_col]:
        plot_name = "%s against iteration for dynamic batch size" % col.name
        plot_name += plot_name_suffix
        plotting.plot_result_attribute(plot_name, this_test_output_dir,
                                       results_list, type(col))
Ejemplo n.º 10
0
    input_dim=input_dim,
    output_dim=output_dim,
    x_lo=x_lo,
    x_hi=x_hi
)
model = models.NeuralNetwork(
    input_dim=input_dim,
    output_dim=output_dim,
    num_hidden_units=[20, 20],
    act_funcs=[models.activations.cauchy, models.activations.identity]
)

# Create result object and add columns for iteration and DBS
result = optimisers.Result(
    name="SGD with line search",
    verbose=True,
    add_default_columns=False
)
i_column    = optimisers.results.columns.Iteration()
dbs_column  = optimisers.results.columns.DbsMetric()
result.add_column(i_column)
result.add_column(dbs_column)

# Call gradient descent function
model.get_gradient_vector(sin_data.x_train, sin_data.y_train)
result = optimisers.gradient_descent(
    model,
    sin_data,
    terminator=optimisers.Terminator(i_lim=i_lim),
    evaluator=optimisers.Evaluator(i_interval=i_interval),
    result=result,