def test_gradient_descent_optimizer(self): """Check that gradient descent can find the optimum of the quadratic test objective.""" # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer(self.domain, self.polynomial, self.gd_parameters) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def test_gradient_descent_optimizer_constrained(self): """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside.""" # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain domain_bounds = [ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32)] domain = TensorProductDomain(domain_bounds) gradient_descent_optimizer = GradientDescentOptimizer(domain, self.polynomial, self.gd_parameters) # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain) constrained_optimum_point = self.polynomial.optimum_point for i, bounds in enumerate(domain_bounds): if constrained_optimum_point[i] > bounds.max: constrained_optimum_point[i] = bounds.max elif constrained_optimum_point[i] < bounds.min: constrained_optimum_point[i] = bounds.min tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess initial_value = gradient_descent_optimizer.objective_function.compute_objective_function() gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, constrained_optimum_point, tolerance) # Verify optimized value is better than initial guess final_value = self.polynomial.compute_objective_function() T.assert_gt(final_value, initial_value) # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries gradient = self.polynomial.compute_grad_objective_function() for i, bounds in enumerate(domain_bounds): if bounds.is_inside(self.polynomial.optimum_point[i]): self.assert_scalar_within_relative(gradient[i], 0.0, tolerance)
def test_multistarted_gradient_descent_optimizer(self): """Check that multistarted GD can find the optimum in a 'very' large domain.""" # Set a large domain: a single GD run is unlikely to reach the optimum domain_bounds = [ClosedInterval(-10.0, 10.0)] * self.dim domain = TensorProductDomain(domain_bounds) tolerance = 2.0e-10 num_points = 10 gradient_descent_optimizer = GradientDescentOptimizer( domain, self.polynomial, self.gd_parameters) multistart_optimizer = MultistartOptimizer(gradient_descent_optimizer, num_points) output, _ = multistart_optimizer.optimize() # Verify coordinates self.assert_vector_within_relative(output, self.polynomial.optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def test_multistarted_gradient_descent_optimizer_crippled_start(self): """Check that multistarted GD is finding the best result from GD.""" # Only allow 1 GD iteration. max_num_steps = 1 max_num_restarts = 1 param_dict = self.gd_parameters._asdict() param_dict['max_num_steps'] = max_num_steps param_dict['max_num_restarts'] = max_num_restarts gd_parameters_crippled = GradientDescentParameters(**param_dict) gradient_descent_optimizer_crippled = GradientDescentOptimizer( self.domain, self.polynomial, gd_parameters_crippled) num_points = 15 points = self.domain.generate_uniform_random_points_in_domain( num_points) multistart_optimizer = MultistartOptimizer( gradient_descent_optimizer_crippled, num_points) test_best_point, _ = multistart_optimizer.optimize( random_starts=points) # This point set won't include the optimum so multistart GD won't find it. for value in (test_best_point - self.polynomial.optimum_point): assert value != 0.0 points_with_opt = numpy.append(points, self.polynomial.optimum_point.reshape( (1, self.polynomial.dim)), axis=0) test_best_point, _ = multistart_optimizer.optimize( random_starts=points_with_opt) # This point set will include the optimum so multistart GD will find it. for value in (test_best_point - self.polynomial.optimum_point): assert value == 0.0
def test_gradient_descent_optimizer_with_averaging(self): """Check that gradient descent can find the optimum of the quadratic test objective with averaging on. This test doesn't exercise the purpose of averaging (i.e., this objective isn't stochastic), but it does check that it at least runs. """ num_steps_averaged = self.gd_parameters.max_num_steps * 3 / 4 gd_parameters_averaging = GradientDescentParameters( self.gd_parameters.max_num_steps, self.gd_parameters.max_num_restarts, num_steps_averaged, self.gd_parameters.gamma, self.gd_parameters.pre_mult, self.gd_parameters.max_relative_change, self.gd_parameters.tolerance, ) # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer(self.domain, self.polynomial, gd_parameters_averaging) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-10 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def test_multistart_monte_carlo_expected_improvement_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 2-EI).""" numpy.random.seed(7858) # TODO(271): Monte Carlo only works for this seed index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] max_num_steps = 75 # this is *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 50 gamma = 0.2 pre_mult = 1.5 max_relative_change = 1.0 tolerance = 3.0e-2 # really large tolerance b/c converging with monte-carlo (esp in Python) is expensive gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 2 # Expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim) num_to_sample = 2 repeated_domain = RepeatedDomain(num_to_sample, expanded_domain) num_mc_iterations = 10000 # Just any random point that won't be optimal points_to_sample = repeated_domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample, num_mc_iterations=num_mc_iterations) # Compute EI and its gradient for the sake of comparison ei_initial = ei_eval.compute_expected_improvement(force_monte_carlo=True) # TODO(271) Monte Carlo only works for this seed grad_ei_initial = ei_eval.compute_grad_expected_improvement() ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters) best_point = multistart_expected_improvement_optimization(ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are "small" ei_eval.current_point = best_point ei_final = ei_eval.compute_expected_improvement(force_monte_carlo=True) # TODO(271) Monte Carlo only works for this seed grad_ei_final = ei_eval.compute_grad_expected_improvement() self.assert_vector_within_relative(grad_ei_final, numpy.zeros(grad_ei_final.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True # Since we didn't really converge to the optimal EI (too costly), do some other sanity checks # EI should have improved assert ei_final >= ei_initial # grad EI should have improved for index in numpy.ndindex(grad_ei_final.shape): assert numpy.fabs(grad_ei_final[index]) <= numpy.fabs(grad_ei_initial[index])
def test_get_averaging_range(self): """Test the method used to produce what interval to average over in Polyak-Ruppert averaging.""" num_steps_total = 250 end = num_steps_total + 1 num_steps_averaged_input_list = [-1, 0, 1, 20, 100, 249, 250, 251, 10000] truth_list = [(1, end), (250, end), (250, end), (231, end), (151, end), (2, end), (1, end), (1, end), (1, end)] for i, truth in enumerate(truth_list): start, end = GradientDescentOptimizer._get_averaging_range(num_steps_averaged_input_list[i], num_steps_total) T.assert_equal(start, truth[0]) T.assert_equal(end, truth[1])
def test_gradient_descent_optimizer_constrained(self): """Check that gradient descent can find the global optimum (in a domain) when the true optimum is outside.""" # Domain where the optimum, (0.5, 0.5, 0.5), lies outside the domain domain_bounds = [ ClosedInterval(0.05, 0.32), ClosedInterval(0.05, 0.6), ClosedInterval(0.05, 0.32) ] domain = TensorProductDomain(domain_bounds) gradient_descent_optimizer = GradientDescentOptimizer( domain, self.polynomial, self.gd_parameters) # Work out what the maxima point woudl be given the domain constraints (i.e., project to the nearest point on domain) constrained_optimum_point = self.polynomial.optimum_point for i, bounds in enumerate(domain_bounds): if constrained_optimum_point[i] > bounds.max: constrained_optimum_point[i] = bounds.max elif constrained_optimum_point[i] < bounds.min: constrained_optimum_point[i] = bounds.min tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess initial_value = gradient_descent_optimizer.objective_function.compute_objective_function( ) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, constrained_optimum_point, tolerance) # Verify optimized value is better than initial guess final_value = self.polynomial.compute_objective_function() assert final_value >= initial_value # Verify derivative: only get 0 derivative if the coordinate lies inside domain boundaries gradient = self.polynomial.compute_grad_objective_function() for i, bounds in enumerate(domain_bounds): if bounds.is_inside(self.polynomial.optimum_point[i]): self.assert_scalar_within_relative(gradient[i], 0.0, tolerance)
def test_multistart_hyperparameter_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum hyperparameters.""" random_state = numpy.random.get_state() numpy.random.seed(87612) max_num_steps = 200 # this is generally *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 0 gamma = 0.2 pre_mult = 1.0 max_relative_change = 0.3 tolerance = 1.0e-11 gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 3 # again, too few multistarts; but we want the test to run reasonably quickly num_sampled = 10 self.gp_test_environment_input.num_sampled = num_sampled _, gaussian_process = self._build_gaussian_process_test_data( self.gp_test_environment_input) python_cov, historical_data = gaussian_process.get_core_data_copy() lml = GaussianProcessLogMarginalLikelihood(python_cov, historical_data) domain = TensorProductDomain( [ClosedInterval(1.0, 4.0)] * self.gp_test_environment_input.num_hyperparameters) hyperparameter_optimizer = GradientDescentOptimizer( domain, lml, gd_parameters) best_hyperparameters = multistart_hyperparameter_optimization( hyperparameter_optimizer, num_multistarts) # Check that gradients are small lml.hyperparameters = best_hyperparameters gradient = lml.compute_grad_log_likelihood() self.assert_vector_within_relative( gradient, numpy.zeros(self.num_hyperparameters), tolerance) # Check that output is in the domain assert domain.check_point_inside(best_hyperparameters) is True numpy.random.set_state(random_state)
def test_gradient_descent_optimizer_with_averaging(self): """Test if Gradient Descent can optimize a simple objective function. This test doesn't exercise the purpose of averaging (i.e., this objective isn't stochastic), but it does check that it at least runs. """ num_steps_averaged = self.gd_parameters.max_num_steps * 3 / 4 param_dict = self.gd_parameters._asdict() param_dict['num_steps_averaged'] = num_steps_averaged gd_parameters_averaging = GradientDescentParameters(**param_dict) gradient_descent_optimizer = GradientDescentOptimizer( self.domain, self.polynomial, gd_parameters_averaging) self.optimizer_test(gradient_descent_optimizer, tolerance=2.0e-10)
def test_get_averaging_range(self): """Test the method used to produce what interval to average over in Polyak-Ruppert averaging.""" num_steps_total = 250 end = num_steps_total + 1 num_steps_averaged_input_list = [ -1, 0, 1, 20, 100, 249, 250, 251, 10000 ] truth_list = [(1, end), (250, end), (250, end), (231, end), (151, end), (2, end), (1, end), (1, end), (1, end)] for i, truth in enumerate(truth_list): start, end = GradientDescentOptimizer._get_averaging_range( num_steps_averaged_input_list[i], num_steps_total) assert start == truth[0] assert end == truth[1]
def test_multistart_analytic_expected_improvement_optimization(self): """Check that multistart optimization (gradient descent) can find the optimum point to sample (using 1D analytic EI).""" numpy.random.seed(3148) index = numpy.argmax(numpy.greater_equal(self.num_sampled_list, 20)) domain, gaussian_process = self.gp_test_environments[index] max_num_steps = 200 # this is generally *too few* steps; we configure it this way so the test will run quickly max_num_restarts = 5 num_steps_averaged = 0 gamma = 0.2 pre_mult = 1.5 max_relative_change = 1.0 tolerance = 1.0e-7 gd_parameters = GradientDescentParameters( max_num_steps, max_num_restarts, num_steps_averaged, gamma, pre_mult, max_relative_change, tolerance, ) num_multistarts = 3 points_to_sample = domain.generate_random_point_in_domain() ei_eval = ExpectedImprovement(gaussian_process, points_to_sample) # expand the domain so that we are definitely not doing constrained optimization expanded_domain = TensorProductDomain([ClosedInterval(-4.0, 2.0)] * self.dim) num_to_sample = 1 repeated_domain = RepeatedDomain(ei_eval.num_to_sample, expanded_domain) ei_optimizer = GradientDescentOptimizer(repeated_domain, ei_eval, gd_parameters) best_point = multistart_expected_improvement_optimization( ei_optimizer, num_multistarts, num_to_sample) # Check that gradients are small ei_eval.current_point = best_point gradient = ei_eval.compute_grad_expected_improvement() self.assert_vector_within_relative(gradient, numpy.zeros(gradient.shape), tolerance) # Check that output is in the domain assert repeated_domain.check_point_inside(best_point) is True
def test_gradient_descent_optimizer_with_averaging(self): """Check that gradient descent can find the optimum of the quadratic test objective with averaging on. This test doesn't exercise the purpose of averaging (i.e., this objective isn't stochastic), but it does check that it at least runs. """ num_steps_averaged = self.gd_parameters.max_num_steps * 3 / 4 gd_parameters_averaging = GradientDescentParameters( self.gd_parameters.max_num_steps, self.gd_parameters.max_num_restarts, num_steps_averaged, self.gd_parameters.gamma, self.gd_parameters.pre_mult, self.gd_parameters.max_relative_change, self.gd_parameters.tolerance, ) # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer( self.domain, self.polynomial, gd_parameters_averaging) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-10 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def test_multistarted_gradient_descent_optimizer_crippled_start(self): """Check that multistarted GD is finding the best result from GD.""" # Only allow 1 GD iteration. gd_parameters_crippled = GradientDescentParameters( 1, 1, self.gd_parameters.num_steps_averaged, self.gd_parameters.gamma, self.gd_parameters.pre_mult, self.gd_parameters.max_relative_change, self.gd_parameters.tolerance, ) gradient_descent_optimizer_crippled = GradientDescentOptimizer( self.domain, self.polynomial, gd_parameters_crippled) num_points = 15 points = self.domain.generate_uniform_random_points_in_domain( num_points) multistart_optimizer = MultistartOptimizer( gradient_descent_optimizer_crippled, num_points) test_best_point, _ = multistart_optimizer.optimize( random_starts=points) # This point set won't include the optimum so multistart GD won't find it. for value in (test_best_point - self.polynomial.optimum_point): T.assert_not_equal(value, 0.0) points_with_opt = numpy.append(points, self.polynomial.optimum_point.reshape( (1, self.polynomial.dim)), axis=0) test_best_point, _ = multistart_optimizer.optimize( random_starts=points_with_opt) # This point set will include the optimum so multistart GD will find it. for value in (test_best_point - self.polynomial.optimum_point): T.assert_equal(value, 0.0)
def test_gradient_descent_optimizer(self): """Check that gradient descent can find the optimum of the quadratic test objective.""" # Check the claimed optima is an optima optimum_point = self.polynomial.optimum_point self.polynomial.current_point = optimum_point gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), 0.0) # Verify that gradient descent does not move from the optima if we start it there. gradient_descent_optimizer = GradientDescentOptimizer( self.domain, self.polynomial, self.gd_parameters) gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point self.assert_vector_within_relative(output, optimum_point, 0.0) # Start at a wrong point and check optimization tolerance = 2.0e-13 initial_guess = numpy.full(self.polynomial.dim, 0.2) gradient_descent_optimizer.objective_function.current_point = initial_guess gradient_descent_optimizer.optimize() output = gradient_descent_optimizer.objective_function.current_point # Verify coordinates self.assert_vector_within_relative(output, optimum_point, tolerance) # Verify function value value = self.polynomial.compute_objective_function() self.assert_scalar_within_relative(value, self.polynomial.optimum_value, tolerance) # Verify derivative gradient = self.polynomial.compute_grad_objective_function() self.assert_vector_within_relative(gradient, numpy.zeros(self.polynomial.dim), tolerance)
def test_gradient_descent_multistarted_optimizer(self): """Test if Gradient Descent can optimize a "hard" objective function with multistarts.""" gradient_descent_optimizer = GradientDescentOptimizer( self.large_domain, self.polynomial, self.gd_parameters) self.multistarted_optimizer_test(gradient_descent_optimizer)
def test_gradient_descent_optimizer(self): """Test if Gradient Descent can optimize a simple objective function.""" gradient_descent_optimizer = GradientDescentOptimizer( self.domain, self.polynomial, self.gd_parameters) self.optimizer_test(gradient_descent_optimizer)