def test_construct_feature_dataframe_no_context(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) n_samples = 100 parameter_df = optimization_problem.parameter_space.random_dataframe( n_samples) feature_df = optimization_problem.construct_feature_dataframe( parameters_df=parameter_df) assert feature_df.shape == ( n_samples, len(optimization_problem.parameter_space.dimension_names) + 1) expected_columns = sorted([ f"three_level_quadratic_config.{n}" for n in optimization_problem.parameter_space.dimension_names ]) assert ( feature_df.columns[:-1].sort_values() == expected_columns).all() assert feature_df.columns[-1] == "contains_context" assert not feature_df.contains_context.any()
def test_hierarchical_quadratic_cold_start(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 2 for restart_num in range(num_restarts): optimizer_config = bayesian_optimizer_config_store.default optimizer_config.min_samples_required_for_guided_design_of_experiments = 20 optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best" optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.min_samples_to_fit = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2 local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) for bayesian_optimizer in [local_optimizer, remote_optimizer]: num_guided_samples = 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) print( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = y.to_dataframe() bayesian_optimizer.register( feature_values_pandas_frame=input_values_df, target_values_pandas_frame=target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION) print( f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(optimizer=bayesian_optimizer)
def __init__(self, optimizer_evaluator_config: Point, optimizer: OptimizerBase = None, optimizer_config: Point = None, objective_function: ObjectiveFunctionBase = None, objective_function_config: Point = None): assert optimizer_evaluator_config in optimizer_evaluator_config_store.parameter_space assert (optimizer is None) != ( optimizer_config is None ), "A valid optimizer XOR a valid optimizer_config must be supplied." assert (objective_function is None) != (objective_function_config is None),\ "A valid objective_function XOR a valid objective_function_config must be specified" self.optimizer_evaluator_config = optimizer_evaluator_config self.objective_function_config = None self.objective_function = None self.optimizer_config = None self.optimizer = None # Let's get the objective function assigned to self's fields. # if (objective_function_config is not None) and (objective_function is None): assert objective_function_config in objective_function_config_store.parameter_space self.objective_function_config = objective_function_config self.objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) elif (objective_function is not None) and (objective_function_config is None): self.objective_function_config = objective_function.objective_function_config self.objective_function = objective_function else: # The assert above should have caught it but just in case someone removes or changes it. # assert False, "A valid objective_function XOR a valid objective_function_config must be specified" # Let's get the optimizer and its config assigned to self's fields. # if (optimizer_config is not None) and (optimizer is None): assert optimizer_config in bayesian_optimizer_config_store.parameter_space optimization_problem = self.objective_function.default_optimization_problem self.optimizer_config = optimizer_config self.optimizer = BayesianOptimizerFactory().create_local_optimizer( optimizer_config=optimizer_config, optimization_problem=optimization_problem) elif (optimizer is not None) and (optimizer_config is None): # TODO: assert that the optimization problem in the optimizer matches the objective function. # But this requires Hypergrid.__eq__. # self.optimizer_config = optimizer.optimizer_config self.optimizer = optimizer else: # Again, the assert at the beginning of the constructor should have caught this. But more asserts => less bugs. # assert False, "A valid optimizer XOR a valid optimizer_config must be supplied."
def test_lasso_hierarchical_categorical_predictions(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) rerf = RegressionEnhancedRandomForestRegressionModel( model_config=self.model_config, input_space=objective_function.parameter_space, output_space=objective_function.output_space) # fit model with same degree as true y num_train_x = 100 x_train_df = objective_function.parameter_space.random_dataframe( num_samples=num_train_x) y_train_df = objective_function.evaluate_dataframe(x_train_df) rerf.fit(x_train_df, y_train_df) num_detected_features = len(rerf.detected_feature_indices_) self.assertTrue( rerf.root_model_gradient_coef_.shape == rerf.polynomial_features_powers_.shape, 'Gradient coefficient shape is incorrect') self.assertTrue( rerf.fit_X_.shape == (num_train_x, rerf.polynomial_features_powers_.shape[0]), 'Design matrix shape is incorrect') self.assertTrue( rerf.partial_hat_matrix_.shape == (num_detected_features, num_detected_features), 'Hat matrix shape is incorrect') self.assertTrue(rerf.polynomial_features_powers_.shape == (28, 8), 'PolynomalFeature.power_ shape is incorrect') # test predictions predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value num_test_x = 10 # by generating a single X feature on which to make the predictions, the y_test_list = [] predicted_y_list = [] for _ in range(num_test_x): x_test_df = objective_function.parameter_space.random_dataframe( num_samples=1) y_test_df = objective_function.evaluate_dataframe(x_test_df) y_test_list.append(y_test_df['y'].values[0]) predictions = rerf.predict(x_test_df) pred_df = predictions.get_dataframe() predicted_y_list.append(pred_df[predicted_value_col].values[0]) predicted_y = np.array(predicted_y_list) y_test = np.array(y_test_list) residual_sum_of_squares = ((y_test - predicted_y)**2).sum() total_sum_of_squares = ((y_test - y_test.mean())**2).sum() unexplained_variance = residual_sum_of_squares / total_sum_of_squares self.assertTrue(unexplained_variance < 10**-4, '1 - R^2 larger than expected')
def test_lasso_hierarchical_categorical_predictions(self): random.seed(11001) objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) rerf = RegressionEnhancedRandomForestRegressionModel( model_config=self.model_config, input_space=objective_function.parameter_space, output_space=objective_function.output_space) # fit model with same degree as true y # The input space consists of 3 2-d domains 200 x 200 units. Hence random samples smaller than a certain size will produce too few points to # train reliable models. # TODO: Good place to use a non-random training set design num_train_x = 600 x_train_df = objective_function.parameter_space.random_dataframe( num_samples=num_train_x) y_train_df = objective_function.evaluate_dataframe(x_train_df) rerf.fit(x_train_df, y_train_df) num_detected_features = len(rerf.detected_feature_indices_) self.assertTrue( rerf.root_model_gradient_coef_.shape == rerf.polynomial_features_powers_.shape, 'Gradient coefficient shape is incorrect') self.assertTrue( rerf.fit_X_.shape == (num_train_x, rerf.polynomial_features_powers_.shape[0]), 'Design matrix shape is incorrect') self.assertTrue( rerf.partial_hat_matrix_.shape == (num_detected_features, num_detected_features), 'Hat matrix shape is incorrect') self.assertTrue(rerf.polynomial_features_powers_.shape == (34, 9), 'PolynomalFeature.power_ shape is incorrect') # test predictions predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value num_test_x = 50 x_test_df = objective_function.parameter_space.random_dataframe( num_samples=num_test_x) predictions = rerf.predict(x_test_df) pred_df = predictions.get_dataframe() predicted_y = pred_df[predicted_value_col].to_numpy() y_test = objective_function.evaluate_dataframe( x_test_df).to_numpy().reshape(-1) residual_sum_of_squares = ((y_test - predicted_y)**2).sum() total_sum_of_squares = ((y_test - y_test.mean())**2).sum() unexplained_variance = residual_sum_of_squares / total_sum_of_squares test_threshold = 10**-3 self.assertTrue( unexplained_variance < test_threshold, f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})' )
def setup_class(cls): """ Set's up all the objects needed to test the RandomSearchOptimizer To test the RandomSearchOptimizer we need to first construct: * an optimization problem * a utility function To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction test. :return: """ global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) cls.input_space = objective_function.parameter_space cls.output_space = objective_function.output_space cls.input_values_dataframe = objective_function.parameter_space.random_dataframe( num_samples=2500) cls.output_values_dataframe = objective_function.evaluate_dataframe( cls.input_values_dataframe) cls.model_config = homogeneous_random_forest_config_store.default print(cls.model_config) cls.model = MultiObjectiveHomogeneousRandomForest( model_config=cls.model_config, input_space=cls.input_space, output_space=cls.output_space) cls.model.fit(cls.input_values_dataframe, cls.output_values_dataframe, iteration_number=len(cls.input_values_dataframe.index)) cls.utility_function_config = Point( utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) cls.optimization_problem = OptimizationProblem( parameter_space=cls.input_space, objective_space=cls.output_space, objectives=[Objective(name='y', minimize=True)]) cls.utility_function = ConfidenceBoundUtilityFunction( function_config=cls.utility_function_config, surrogate_model=cls.model, minimize=cls.optimization_problem.objectives[0].minimize)
def test_default_config(self, objective_function_config_name): objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name) objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config) lasso_model_config = lasso_cross_validated_config_store.default multi_objective_rf = MultiObjectiveLassoCrossValidated( model_config=lasso_model_config, input_space=objective_function.parameter_space, output_space=objective_function.output_space, logger=self.logger ) if objective_function_config_name == '2d_hypersphere_minimize_some': num_training_samples = 25 num_testing_samples = 10 elif objective_function_config_name == '10d_hypersphere_minimize_some': num_training_samples = 50 num_testing_samples = 10 elif objective_function_config_name == '5_mutually_exclusive_polynomials': num_training_samples = 100 num_testing_samples = 50 else: assert False train_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_training_samples) train_objectives_df = objective_function.evaluate_dataframe(train_params_df) test_params_df = objective_function.parameter_space.random_dataframe(num_samples=num_testing_samples) test_objectives_df = objective_function.evaluate_dataframe(test_params_df) multi_objective_rf.fit(features_df=train_params_df, targets_df=train_objectives_df, iteration_number=num_training_samples) multi_objective_predictions = multi_objective_rf.predict(features_df=train_params_df, include_only_valid_rows=True) # TRAINING DATA # print("------------------------------------------------------------------------------------") print("--------------------------------------- TRAIN --------------------------------------") print("------------------------------------------------------------------------------------") training_gof = multi_objective_rf.compute_goodness_of_fit(features_df=train_params_df, targets_df=train_objectives_df, data_set_type=DataSetType.TRAIN) for objective_name in objective_function.output_space.dimension_names: print("------------------------------------------------------------------------------------") print(objective_name) print(training_gof[objective_name].to_json(indent=2)) # TESTING DATA print("------------------------------------------------------------------------------------") print("--------------------------------------- TEST ---------------------------------------") print("------------------------------------------------------------------------------------") testing_gof = multi_objective_rf.compute_goodness_of_fit(features_df=test_params_df, targets_df=test_objectives_df, data_set_type=DataSetType.TEST_KNOWN_RANDOM) for objective_name in objective_function.output_space.dimension_names: print("------------------------------------------------------------------------------------") print(objective_name) print(testing_gof[objective_name].to_json(indent=2))
def test_lasso_hierarchical_categorical_predictions(self): random.seed(11001) objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) polynomial_features_adapter = ContinuousToPolynomialBasisHypergridAdapter( adaptee=objective_function.parameter_space, degree=2, include_bias=True, interaction_only=False) lasso_cross_validated_model = LassoCrossValidatedRegressionModel( model_config=self.model_config, input_space=polynomial_features_adapter, output_space=objective_function.output_space) # since the model input_space stacked the polynomial basis function on in the original input space, we can skip validating input features lasso_cross_validated_model.skip_input_filtering_on_predict = True # fit model with same degree as true y # The input space consists of 3 2-d domains 200 x 200 units. Hence random samples smaller than a certain size will produce too few points to # train reliable models. # TODO: Good place to use a non-random training set design num_train_x = 300 x_train_df = objective_function.parameter_space.random_dataframe( num_samples=num_train_x) y_train_df = objective_function.evaluate_dataframe(x_train_df) lasso_cross_validated_model.fit(x_train_df, y_train_df, iteration_number=0) # test predictions num_test_x = 50 x_test_df = objective_function.parameter_space.random_dataframe( num_samples=num_test_x) y_test = objective_function.evaluate_dataframe( x_test_df).to_numpy().reshape(-1) predictions = lasso_cross_validated_model.predict(x_test_df) pred_df = predictions.get_dataframe() predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value predicted_y = pred_df[predicted_value_col].to_numpy() residual_sum_of_squares = ((y_test - predicted_y)**2).sum() total_sum_of_squares = ((y_test - y_test.mean())**2).sum() unexplained_variance = residual_sum_of_squares / total_sum_of_squares test_threshold = 10**-6 print(f'Asserting {unexplained_variance} < {test_threshold}') assert unexplained_variance < test_threshold, f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated( self): """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data. """ objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) random_params_df = objective_function.parameter_space.random_dataframe( num_samples=10000) y_df = objective_function.evaluate_dataframe(random_params_df) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) bayesian_optimizer.register(random_params_df, y_df) num_guided_samples = 20 for i in range(num_guided_samples): # Suggest the parameters suggested_params = bayesian_optimizer.suggest() target_value = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) # Register the observation with the optimizer bayesian_optimizer.register(suggested_params.to_dataframe(), target_value.to_dataframe()) self.validate_optima(bayesian_optimizer) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"Optimum: {best_objective} Best Configuration: {best_config_point}" ) trace_output_path = os.path.join(self.temp_dir, "PreHeatedTrace.json") self.logger.info(f"Writing trace to {trace_output_path}") global_values.tracer.dump_trace_to_file( output_file_path=trace_output_path) global_values.tracer.clear_events()
def test_glow_worm_on_three_level_quadratic(self): output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) # Let's warm up the model a bit # num_warmup_samples = 1000 random_params_df = objective_function.parameter_space.random_dataframe( num_samples=num_warmup_samples) y = objective_function.evaluate_dataframe(random_params_df) model = HomogeneousRandomForestRegressionModel( model_config=self.model_config, input_space=objective_function.parameter_space, output_space=output_space) model.fit(feature_values_pandas_frame=random_params_df, target_values_pandas_frame=y, iteration_number=num_warmup_samples) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) utility_function = ConfidenceBoundUtilityFunction( function_config=self.utility_function_config, surrogate_model=model, minimize=optimization_problem.objectives[0].minimize) glow_worm_swarm_optimizer = GlowWormSwarmOptimizer( optimization_problem=optimization_problem, utility_function=utility_function, optimizer_config=glow_worm_swarm_optimizer_config_store.default) num_iterations = 5 for i in range(num_iterations): suggested_params = glow_worm_swarm_optimizer.suggest() print(f"[{i+1}/{num_iterations}] {suggested_params.to_json()}") self.assertTrue( suggested_params in objective_function.parameter_space)
def test_named_configs(self, config_name): objective_function_config = objective_function_config_store.get_config_by_name( config_name) print(objective_function_config.to_json(indent=2)) objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) for _ in range(100): random_point = objective_function.parameter_space.random() value = objective_function.evaluate_point(random_point) assert value in objective_function.output_space for i in range(1, 100): random_dataframe = objective_function.parameter_space.random_dataframe( num_samples=i) values_df = objective_function.evaluate_dataframe(random_dataframe) assert values_df.index.equals(random_dataframe.index)
def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. """ objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) num_guided_samples = 1000 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() target_value = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) bayesian_optimizer.register(suggested_params.to_dataframe(), target_value.to_dataframe()) if i > 20 and i % 20 == 0: best_config_point, best_objective = bayesian_optimizer.optimum( ) self.logger.info( f"[{i}/{num_guided_samples}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(bayesian_optimizer) best_config, optimum = bayesian_optimizer.optimum() assert objective_function.parameter_space.contains_point(best_config) assert objective_function.output_space.contains_point(optimum) _, all_targets = bayesian_optimizer.get_all_observations() assert optimum.y == all_targets.min()[0] self.logger.info( f"Optimum: {optimum} best configuration: {best_config}")
def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) objective_function_config = objective_function_config_store.get_config_by_name('2d_quadratic_concave_up') self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config) self.optimization_problem = OptimizationProblem( parameter_space=self.objective_function.parameter_space, objective_space=self.objective_function.output_space, objectives=[Objective(name='y', minimize=True)] )
def test_hierarchical_quadratic_cold_start(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 1000 for restart_num in range(num_restarts): bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default, logger=self.logger) num_guided_samples = 200 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) self.logger.info( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}") input_values_df = suggested_params.to_dataframe() target_values_df = y.to_dataframe() bayesian_optimizer.register(input_values_df, target_values_df) self.validate_optima(bayesian_optimizer) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"[{restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" )
def __init__( self, model_config: Point, input_space: Hypergrid = None, output_space: Hypergrid = None, logger=None ): assert model_config in multi_objective_pass_through_model_config_store.parameter_space self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=model_config.objective_function_config) MultiObjectiveRegressionModel.__init__( self, model_type=type(self), model_config=model_config, input_space=self.objective_function.default_optimization_problem.feature_space, output_space=self.objective_function.output_space ) if logger is None: logger = create_logger(self.__class__.__name__) self.logger = logger
def test_bayesian_optimizer_with_random_near_incumbent(self): objective_function_config = objective_function_config_store.get_config_by_name( 'multi_objective_waves_3_params_2_objectives_half_pi_phase_difference' ) objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) optimization_problem = objective_function.default_optimization_problem optimizer_config = bayesian_optimizer_config_store.get_config_by_name( 'default_with_random_near_incumbent_config') assert optimizer_config.experiment_designer_config.numeric_optimizer_implementation == "RandomNearIncumbentOptimizer" optimizer_config.experiment_designer_config.fraction_random_suggestions = 0 # Let's give it a little more resolution. # optimizer_config.experiment_designer_config.multi_objective_probability_of_improvement_config.num_monte_carlo_samples = 200 bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) random_params_df = objective_function.parameter_space.random_dataframe( num_samples=1000) objectives_df = objective_function.evaluate_dataframe(random_params_df) bayesian_optimizer.register( parameter_values_pandas_frame=random_params_df, target_values_pandas_frame=objectives_df) num_suggestions = 10 for suggestion_number in range(num_suggestions): parameters = bayesian_optimizer.suggest() objectives = objective_function.evaluate_point(parameters) self.logger.info( f"[{suggestion_number}/{num_suggestions}] parameters: {parameters}, objectives: {objectives}" ) bayesian_optimizer.register( parameter_values_pandas_frame=parameters.to_dataframe(), target_values_pandas_frame=objectives.to_dataframe())
def setup_method(self, method): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. Try a bunch of times before giving up. # max_num_tries = 100 num_tries = 0 for port in range(50051, 50051 + max_num_tries): num_tries += 1 try: self.server = OptimizerServicesServer(port=port, num_threads=10) self.server.start() self.port = port break except: self.logger.info( f"Failed to create OptimizerMicroserviceServer on port {port}" ) if num_tries == max_num_tries: raise self.optimizer_service_channel = grpc.insecure_channel( f'localhost:{self.port}') self.bayesian_optimizer_factory = BayesianOptimizerFactory( grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor( grpc_channel=self.optimizer_service_channel, logger=self.logger) objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') self.objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) self.optimization_problem = OptimizationProblem( parameter_space=self.objective_function.parameter_space, objective_space=self.objective_function.output_space, objectives=[Objective(name='y', minimize=True)])
def test_named_configs(self): named_configs = objective_function_config_store.list_named_configs() objective_function_configs_to_test = [ named_config.config_point for named_config in named_configs ] for objective_function_config in objective_function_configs_to_test: print(objective_function_config.to_json(indent=2)) objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) default_polynomials_domain = objective_function.parameter_space for _ in range(100): random_point = default_polynomials_domain.random() value = objective_function.evaluate_point(random_point) self.assertTrue(value in objective_function.output_space) for i in range(1, 100): random_dataframe = default_polynomials_domain.random_dataframe( num_samples=i) values_df = objective_function.evaluate_dataframe( random_dataframe) self.assertTrue(values_df.index.equals(random_dataframe.index))
def test_rerf_hierarchical_categorical_predictions(self): random.seed(11001) objective_function_config = objective_function_config_store.get_config_by_name('three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config) rerf = RegressionEnhancedRandomForestRegressionModel( model_config=self.model_config, input_space=objective_function.parameter_space, output_space=objective_function.output_space ) # fit model with same degree as true y # The input space consists of 3 2-d domains 200 x 200 units. Hence random samples smaller than a certain size will produce too few points to # train reliable models. # TODO: Good place to use a non-random training set design num_train_x = 300 x_train_df = objective_function.parameter_space.random_dataframe(num_samples=num_train_x) y_train_df = objective_function.evaluate_dataframe(x_train_df) rerf.fit(x_train_df, y_train_df) # test predictions predicted_value_col = Prediction.LegalColumnNames.PREDICTED_VALUE.value num_test_x = 50 x_test_df = objective_function.parameter_space.random_dataframe(num_samples=num_test_x) y_test = objective_function.evaluate_dataframe(x_test_df).to_numpy().reshape(-1) predictions = rerf.predict(x_test_df) pred_df = predictions.get_dataframe() predicted_y = pred_df[predicted_value_col].to_numpy() residual_sum_of_squares = ((y_test - predicted_y) ** 2).sum() total_sum_of_squares = ((y_test - y_test.mean()) ** 2).sum() unexplained_variance = residual_sum_of_squares / total_sum_of_squares test_threshold = 10**-6 print(unexplained_variance, test_threshold) assert unexplained_variance < test_threshold, f'1 - R^2 = {unexplained_variance} larger than expected ({test_threshold})'
def test_optimizer_with_random_config_random_objective(self, i): objective_function_config = objective_function_config_store.parameter_space.random( ) objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) optimization_problem = objective_function.default_optimization_problem optimizer_config = bayesian_optimizer_config_store.parameter_space.random( ) optimizer_config.min_samples_required_for_guided_design_of_experiments = max( min( optimizer_config. min_samples_required_for_guided_design_of_experiments, 100), 20) if optimizer_config.surrogate_model_implementation == "HomogeneousRandomForestRegressionModel": rf_config = optimizer_config.homogeneous_random_forest_regression_model_config rf_config.n_estimators = min(rf_config.n_estimators, 20) if optimizer_config.surrogate_model_implementation == MultiObjectiveRegressionEnhancedRandomForest.__name__: optimizer_config.min_samples_required_for_guided_design_of_experiments = 25 rerf_model_config = optimizer_config.regression_enhanced_random_forest_regression_model_config rerf_model_config.max_basis_function_degree = min( rerf_model_config.max_basis_function_degree, 2) # increased polynomial degree requires more data to estimate model parameters (poly term coefficients) optimizer_config.min_samples_required_for_guided_design_of_experiments += 25 * ( rerf_model_config.max_basis_function_degree - 1) rf_model_config = rerf_model_config.sklearn_random_forest_regression_model_config rf_model_config.perform_initial_random_forest_hyper_parameter_search = False rf_model_config.max_depth = min(rf_model_config.max_depth, 10) rf_model_config.n_jobs = min(rf_model_config.n_jobs, 4) print( f"[{i+1}] Creating a bayesian optimizer with config: {optimizer_config} \n\n\nObjective function config: {objective_function_config}" ) bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) registered_params_df, registered_objectives_df = self.optimize_objective_function( optimizer=bayesian_optimizer, objective_function=objective_function, num_iterations=20) # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison. # registered_features_json = registered_params_df.to_json( orient='index', double_precision=15) registered_objectives_json = registered_objectives_df.to_json( orient='index', double_precision=15) # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away. # assert len(registered_features_json) > 0 assert len(registered_objectives_json) > 0 registered_params_df = pd.read_json(registered_features_json, orient='index') registered_objectives_df = pd.read_json(registered_objectives_json, orient='index') observed_params_df, observed_objectives_df, _ = bayesian_optimizer.get_all_observations( ) numeric_params_names = [ dimension.name for dimension in optimization_problem.parameter_space.dimensions if (isinstance(dimension, (ContinuousDimension, DiscreteDimension)) or (isinstance(dimension, CategoricalDimension) and dimension.is_numeric)) and ( dimension.name in registered_params_df.columns) and ( dimension.name in observed_params_df.columns) ] numeric_params_df = registered_params_df[numeric_params_names] observed_numeric_params_df = observed_params_df[numeric_params_names] assert (np.abs( numeric_params_df.fillna(0) - observed_numeric_params_df.fillna(0)) < 0.00000001).all().all() assert (np.abs(registered_objectives_df - observed_objectives_df) < 0.00000001).all().all()
def test_hyperspheres(self, minimize, num_output_dimensions, num_points): """Uses a hypersphere to validate that ParetoFrontier can correctly identify pareto-optimal points.""" hypersphere_radius = 10 objective_function_config = Point( implementation=Hypersphere.__name__, hypersphere_config=Point( num_objectives=num_output_dimensions, minimize=minimize, radius=hypersphere_radius ) ) objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config) optimization_problem = objective_function.default_optimization_problem random_params_df = optimization_problem.parameter_space.random_dataframe(num_points) # Let's randomly subsample 10% of points in random_params_df and make those points pareto optimal. # optimal_points_index = random_params_df.sample( frac=0.1, replace=False, axis='index' ).index random_params_df.loc[optimal_points_index, ['radius']] = hypersphere_radius objectives_df = objective_function.evaluate_dataframe(dataframe=random_params_df) # Conveniently, we can double check all of our math by invoking Pythagoras. Basically: # # assert y0**2 + y1**2 + ... == radius**2 # assert (np.power(objectives_df, 2).sum(axis=1) - np.power(random_params_df["radius"], 2) < 0.000001).all() # Just a few more sanity checks before we do the pareto computation. # if minimize == "all": assert (objectives_df <= 0).all().all() elif minimize == "none": assert (objectives_df >= 0).all().all() else: for column, minimize_column in zip(objectives_df, objective_function.minimize_mask): if minimize_column: assert (objectives_df[column] <= 0).all() else: assert (objectives_df[column] >= 0).all() pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=objectives_df, parameters_df=random_params_df ) pareto_df = pareto_frontier.pareto_df # We know that all of the pareto efficient points must be on the frontier. # assert optimal_points_index.difference(pareto_df.index.intersection(optimal_points_index)).empty assert len(pareto_df.index) >= len(optimal_points_index) # If we flip all minimized objectives, we can assert on even more things. # for column, minimize_column in zip(objectives_df, objective_function.minimize_mask): if minimize_column: objectives_df[column] = -objectives_df[column] pareto_df[column] = - pareto_df[column] non_pareto_index = objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def test_pareto_frontier_volume_on_hyperspheres(self, minimize, num_dimensions): """Uses a known formula for the volume of the hyperspheres to validate the accuracy of the pareto frontier estimate. :return: """ hypersphere_radius = 10 inscribed_hypersphere_radius = 7 # For computing lower bound on volume # In order to validate the estimates, we must know the allowable upper and lower bounds. # We know that the estimate should not be higher than the volume of the n-ball (ball in n-dimensions). # We can also come up with a lower bound, by computing a volume of a slightly smaller ball inscribed # into the hypersphere. Note that the volume of an n-ball can be computed recursively, so we keep track # of n-ball volumes in lower dimensions. upper_bounds_on_sphere_volume_by_num_dimensions = {} lower_bounds_on_sphere_volume_by_num_dimensions = {} # Compute the base cases for the recursion. # upper_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (hypersphere_radius ** 2) upper_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (hypersphere_radius ** 3) lower_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (inscribed_hypersphere_radius ** 2) lower_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (inscribed_hypersphere_radius ** 3) # Compute the recursive values. # for n in range(4, num_dimensions + 1): upper_bounds_on_sphere_volume_by_num_dimensions[n] = upper_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (hypersphere_radius ** 2) / n lower_bounds_on_sphere_volume_by_num_dimensions[n] = lower_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (inscribed_hypersphere_radius ** 2) / n objective_function_config = Point( implementation=Hypersphere.__name__, hypersphere_config=Point( num_objectives=num_dimensions, minimize=minimize, radius=hypersphere_radius ) ) objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config) parameter_space = objective_function.parameter_space num_points = max(4, num_dimensions) linspaces = [] for dimension in parameter_space.dimensions: if dimension.name == 'radius': linspaces.append(np.array([hypersphere_radius])) else: linspaces.append(dimension.linspace(num_points)) meshgrids = np.meshgrid(*linspaces) reshaped_meshgrids = [meshgrid.reshape(-1) for meshgrid in meshgrids] params_df = pd.DataFrame({ dim_name: reshaped_meshgrids[i] for i, dim_name in enumerate(parameter_space.dimension_names) }) objectives_df = objective_function.evaluate_dataframe(params_df) pareto_frontier = ParetoFrontier( optimization_problem=objective_function.default_optimization_problem, objectives_df=objectives_df, parameters_df=params_df ) print("Num points in pareto frontier: ", len(objectives_df.index)) assert len(pareto_frontier.pareto_df.index) == len(objectives_df.index) pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(num_samples=1000000) ci_lower_bound, ci_upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.05) lower_bound_on_pareto_volume = lower_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions) upper_bound_on_pareto_volume = upper_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions) print("True bounds:", lower_bound_on_pareto_volume, upper_bound_on_pareto_volume) print("CI bounds: ", ci_lower_bound, ci_upper_bound) assert lower_bound_on_pareto_volume <= ci_lower_bound <= ci_upper_bound <= upper_bound_on_pareto_volume
def test_hierarchical_quadratic_cold_start_random_configs(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) random_state = random.Random() num_restarts = 200 for restart_num in range(num_restarts): # Let's set up random seeds so that we can easily repeat failed experiments # random_state.seed(restart_num) bayesian_optimizer_config_store.parameter_space.random_state = random_state objective_function.parameter_space.random_state = random_state optimizer_config = bayesian_optimizer_config_store.parameter_space.random( ) # The goal here is to make sure the optimizer works with a lot of different configurations. # So let's make sure each run is not too long. # optimizer_config.min_samples_required_for_guided_design_of_experiments = 50 if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__: random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config random_forest_config.n_estimators = min( random_forest_config.n_estimators, 5) decision_tree_config = random_forest_config.decision_tree_regression_model_config decision_tree_config.min_samples_to_fit = 10 decision_tree_config.n_new_samples_before_refit = 10 if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__: optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5 self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Creating a BayesianOptimimizer with the following config: " ) self.logger.info( f"Optimizer config: {optimizer_config.to_json(indent=2)}") bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config, logger=self.logger) num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) self.logger.info( f"[Restart: {restart_num}/{num_restarts}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = suggested_params.to_dataframe() target_values_df = y.to_dataframe() bayesian_optimizer.register(input_values_df, target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum() self.logger.info( f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" )
def test_optimizers_against_untrained_models(self, objective_function_config_name, utility_function_type_name, utility_function_optimizer_type_name): """Tests that the utility function optimizers throw appropriate exceptions when the utility function cannot be evaluated. :return: """ self.logger.info(f"Creating test artifacts for objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.") model_config = homogeneous_random_forest_config_store.default objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name) objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config) optimization_problem = objective_function.default_optimization_problem model = MultiObjectiveHomogeneousRandomForest( model_config=model_config, input_space=optimization_problem.feature_space, output_space=optimization_problem.objective_space, logger=self.logger ) pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem) if utility_function_type_name == ConfidenceBoundUtilityFunction.__name__: utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) utility_function = ConfidenceBoundUtilityFunction( function_config=utility_function_config, surrogate_model=model, minimize=optimization_problem.objectives[0].minimize, logger=self.logger ) elif utility_function_type_name == MultiObjectiveProbabilityOfImprovementUtilityFunction.__name__: utility_function_config = multi_objective_probability_of_improvement_utility_function_config_store.default utility_function = MultiObjectiveProbabilityOfImprovementUtilityFunction( function_config=utility_function_config, pareto_frontier=pareto_frontier, surrogate_model=model, logger=self.logger ) else: assert False if utility_function_optimizer_type_name == RandomSearchOptimizer.__name__: utility_function_optimizer_config = random_search_optimizer_config_store.default elif utility_function_optimizer_type_name == GlowWormSwarmOptimizer.__name__: utility_function_optimizer_config = glow_worm_swarm_optimizer_config_store.default elif utility_function_optimizer_type_name == RandomNearIncumbentOptimizer.__name__: utility_function_optimizer_config = random_near_incumbent_optimizer_config_store.default else: assert False, f"Unknown utility_function_optimizer_type_name: {utility_function_optimizer_type_name}" utility_function_optimizer = UtilityFunctionOptimizerFactory.create_utility_function_optimizer( utility_function=utility_function, optimizer_type_name=utility_function_optimizer_type_name, optimizer_config=utility_function_optimizer_config, optimization_problem=optimization_problem, pareto_frontier=pareto_frontier, logger=self.logger ) assert not model.trained self.logger.info("Asserting the optimizer is throwing appropriate exceptions.") num_failed_suggestions = 3 for i in range(num_failed_suggestions): with pytest.raises(expected_exception=UnableToProduceGuidedSuggestionException): utility_function_optimizer.suggest() self.logger.info(f"[{i+1}/{num_failed_suggestions}] worked.") # Now let's train the model a bit and make sure that we can produce the suggestions afterwards # random_params_df = optimization_problem.parameter_space.random_dataframe(1000) objectives_df = objective_function.evaluate_dataframe(random_params_df) features_df = optimization_problem.construct_feature_dataframe(parameters_df=random_params_df) self.logger.info("Training the model") model.fit(features_df=features_df, targets_df=objectives_df, iteration_number=1000) assert model.trained self.logger.info("Model trained.") self.logger.info("Updating pareto.") pareto_frontier.update_pareto(objectives_df=objectives_df, parameters_df=random_params_df) self.logger.info("Pareto updated.") self.logger.info("Asserting suggestions work.") num_successful_suggestions = 3 for i in range(num_successful_suggestions): suggestion = utility_function_optimizer.suggest() assert suggestion in optimization_problem.parameter_space self.logger.info(f"[{i+1}/{num_successful_suggestions}] successfully produced suggestion: {suggestion}") self.logger.info(f"Done testing. Objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")
def test_default_config(self, objective_function_config_name): objective_function_config = objective_function_config_store.get_config_by_name( objective_function_config_name) objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) rf_config = homogeneous_random_forest_config_store.default multi_objective_rf = MultiObjectiveHomogeneousRandomForest( model_config=rf_config, input_space=objective_function.parameter_space, output_space=objective_function.output_space, logger=self.logger) num_training_samples = 1000 num_testing_samples = 100 train_params_df = objective_function.parameter_space.random_dataframe( num_samples=num_training_samples) train_objectives_df = objective_function.evaluate_dataframe( train_params_df) test_params_df = objective_function.parameter_space.random_dataframe( num_samples=num_testing_samples) test_objectives_df = objective_function.evaluate_dataframe( test_params_df) multi_objective_rf.fit(features_df=train_params_df, targets_df=train_objectives_df, iteration_number=num_training_samples) multi_objective_predictions = multi_objective_rf.predict( features_df=train_params_df, include_only_valid_rows=True) # TRAINING DATA # print( "------------------------------------------------------------------------------------" ) print( "--------------------------------------- TRAIN --------------------------------------" ) print( "------------------------------------------------------------------------------------" ) training_gof = multi_objective_rf.compute_goodness_of_fit( features_df=train_params_df, targets_df=train_objectives_df, data_set_type=DataSetType.TRAIN) for objective_name in objective_function.output_space.dimension_names: print( "------------------------------------------------------------------------------------" ) print(objective_name) print(training_gof[objective_name].to_json(indent=2)) # TESTING DATA print( "------------------------------------------------------------------------------------" ) print( "--------------------------------------- TEST ---------------------------------------" ) print( "------------------------------------------------------------------------------------" ) testing_gof = multi_objective_rf.compute_goodness_of_fit( features_df=test_params_df, targets_df=test_objectives_df, data_set_type=DataSetType.TEST_KNOWN_RANDOM) for objective_name in objective_function.output_space.dimension_names: print( "------------------------------------------------------------------------------------" ) print(objective_name) print(testing_gof[objective_name].to_json(indent=2))
def test_hierarchical_quadratic_cold_start_random_configs( self, restart_num, use_remote_optimizer): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) random_state = random.Random() # Let's set up random seeds so that we can easily repeat failed experiments # random_state.seed(restart_num) bayesian_optimizer_config_store.parameter_space.random_state = random_state objective_function.parameter_space.random_state = random_state optimizer_config = bayesian_optimizer_config_store.parameter_space.random( ) # We can make this test more useful as a Unit Test by restricting its duration. # optimizer_config.min_samples_required_for_guided_design_of_experiments = 20 if optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__: random_forest_config = optimizer_config.homogeneous_random_forest_regression_model_config random_forest_config.n_estimators = min( random_forest_config.n_estimators, 5) decision_tree_config = random_forest_config.decision_tree_regression_model_config decision_tree_config.min_samples_to_fit = 10 decision_tree_config.n_new_samples_before_refit = 10 if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == GlowWormSwarmOptimizer.__name__: optimizer_config.experiment_designer_config.glow_worm_swarm_optimizer_config.num_iterations = 5 if optimizer_config.experiment_designer_config.numeric_optimizer_implementation == RandomSearchOptimizer.__name__: optimizer_config.experiment_designer_config.random_search_optimizer_config.num_samples_per_iteration = min( optimizer_config.experiment_designer_config. random_search_optimizer_config.num_samples_per_iteration, 1000) print( f"[Restart: {restart_num}] Creating a BayesianOptimimizer with the following config: " ) print(optimizer_config.to_json(indent=2)) if not use_remote_optimizer: bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) else: bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) num_guided_samples = optimizer_config.min_samples_required_for_guided_design_of_experiments + 5 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) print( f"[Restart: {restart_num}][Sample: {i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = y.to_dataframe() bayesian_optimizer.register( parameter_values_pandas_frame=input_values_df, target_values_pandas_frame=target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION) print( f"[Restart: {restart_num}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(optimizer=bayesian_optimizer) if not use_remote_optimizer: # Test if pickling works # pickled_optimizer = pickle.dumps(bayesian_optimizer) unpickled_optimizer = pickle.loads(pickled_optimizer) assert unpickled_optimizer.suggest( ) in bayesian_optimizer.optimization_problem.parameter_space
def test_multi_objective_optimization(self, objective_function_implementation, minimize, num_output_dimensions, num_points): if objective_function_implementation == Hypersphere: hypersphere_radius = 10 objective_function_config = Point( implementation=Hypersphere.__name__, hypersphere_config=Point(num_objectives=num_output_dimensions, minimize=minimize, radius=hypersphere_radius)) else: objective_function_config = Point( implementation=MultiObjectiveNestedPolynomialObjective. __name__, multi_objective_nested_polynomial_config=Point( num_objectives=num_output_dimensions, objective_function_implementation=NestedPolynomialObjective .__name__, nested_polynomial_objective_config=Point( num_nested_polynomials=2, nested_function_implementation=PolynomialObjective. __name__, polynomial_objective_config=Point( seed=17, input_domain_dimension=2, input_domain_min=-2**10, input_domain_width=2**11, max_degree=2, include_mixed_coefficients=True, percent_coefficients_zeroed=0.0, coefficient_domain_min=-10.0, coefficient_domain_width=9.0, include_noise=False, noise_coefficient_of_variation=0.0)))) objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) optimization_problem = objective_function.default_optimization_problem if objective_function_implementation == MultiObjectiveNestedPolynomialObjective: # We need to modify the default optimization problem to respect the "minimize" argument. # objectives = [] for i, default_objective in enumerate( optimization_problem.objectives): if minimize == "all": minimize = True elif minimize == "some": minimize = ((i % 2) == 0) else: minimize = False new_objective = Objective(name=default_objective.name, minimize=minimize) objectives.append(new_objective) optimization_problem.objectives = objectives optimizer_config = bayesian_optimizer_config_store.get_config_by_name( "default_multi_objective_optimizer_config") self.logger.info(optimizer_config) optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) assert optimizer.optimizer_config.surrogate_model_implementation == MultiObjectiveHomogeneousRandomForest.__name__ # We can now go through the optimization loop, at each point validating that: # 1) The suggested point is valid. # 2) The volume of the pareto frontier is monotonically increasing. lower_bounds_on_pareto_volume = [] upper_bounds_on_pareto_volume = [] for i in range(num_points): suggestion = optimizer.suggest() assert suggestion in optimization_problem.parameter_space objectives = objective_function.evaluate_point(suggestion) optimizer.register( parameter_values_pandas_frame=suggestion.to_dataframe(), target_values_pandas_frame=objectives.to_dataframe()) if i > 10: pareto_volume_estimator = optimizer.pareto_frontier.approximate_pareto_volume( num_samples=1000000) lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume( alpha=0.95) lower_bounds_on_pareto_volume.append(lower_bound) upper_bounds_on_pareto_volume.append(upper_bound) pareto_volumes_over_time_df = pd.DataFrame({ 'lower_bounds': lower_bounds_on_pareto_volume, 'upper_bounds': upper_bounds_on_pareto_volume }) # If we had precise volume measurements, we would want to ascertain that the volume of the pareto frontier is monotonically increasing. # However, we only have estimates so we cannot assert that they are monotonic. But we can assert that they are approximately monotonic: # we can make sure that any dip between consecutive volumes is smaller than some small number. Actually we can make sure that there # is no drift, by looking over larger windows too. # threshold = -0.1 for periods in [1, 10, 20]: min_pct_increase_in_lower_bound = pareto_volumes_over_time_df[ 'lower_bounds'].pct_change(periods=periods).fillna(0).min() if not (min_pct_increase_in_lower_bound > threshold): print(pareto_volumes_over_time_df) assert min_pct_increase_in_lower_bound > threshold min_pct_increase_in_upper_bound = pareto_volumes_over_time_df[ 'upper_bounds'].pct_change(periods=periods).fillna(0).min() if not (min_pct_increase_in_upper_bound > threshold): print(pareto_volumes_over_time_df) assert min_pct_increase_in_upper_bound > threshold