def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) # Define the optimization problem. # input_space = SimpleHypergrid( name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ] ) output_space = SimpleHypergrid( name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] ) self.optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)] )
def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.DEBUG # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_grpc_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_grpc_channel, logger=self.logger) self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context.communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, bayesian_optimizer_grpc_channel=self.optimizer_service_grpc_channel ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons() # TODO: having both globals and global_values is a problem # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self.mlos_agent.set_configuration( component_type=SmartCacheWorkloadGenerator, new_config_values=Point( workload_type='cyclical_key_from_range', cyclical_key_from_range_config=Point( min=0, range_width=2048 ) ) ) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator(logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.hit_rate_monitor = HitRateMonitor() self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[self.working_set_size_estimator, self.hit_rate_monitor] ) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ContinuousDimension(name="hit_rate", min=0, max=1)]), objectives=[Objective(name="hit_rate", minimize=False)] )
def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) objective_function_config = objective_function_config_store.get_config_by_name('2d_quadratic_concave_up') self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config) self.optimization_problem = OptimizationProblem( parameter_space=self.objective_function.parameter_space, objective_space=self.objective_function.output_space, objectives=[Objective(name='y', minimize=True)] )
def setup_method(self, method): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. Try a bunch of times before giving up. # max_num_tries = 100 num_tries = 0 for port in range(50051, 50051 + max_num_tries): num_tries += 1 try: self.server = OptimizerMicroserviceServer(port=port, num_threads=10) self.server.start() self.port = port break except: self.logger.info( f"Failed to create OptimizerMicroserviceServer on port {port}" ) if num_tries == max_num_tries: raise self.optimizer_service_channel = grpc.insecure_channel( f'localhost:{self.port}') self.bayesian_optimizer_factory = BayesianOptimizerFactory( grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor( grpc_channel=self.optimizer_service_channel, logger=self.logger) objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') self.objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config) self.optimization_problem = OptimizationProblem( parameter_space=self.objective_function.parameter_space, objective_space=self.objective_function.output_space, objectives=[Objective(name='y', minimize=True)])
def setup_class(cls): """ Sets up all the singletons needed to test the BayesianOptimizer. """ warnings.simplefilter("error") global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger(logger_name=cls.__name__) cls.logger.setLevel(logging.DEBUG) cls.port = None # Start up the gRPC service. Try a bunch of ports, before giving up so we can do several in parallel. # max_num_tries = 100 num_tries = 0 for port in range(50051, 50051 + max_num_tries): num_tries += 1 try: cls.server = OptimizerMicroserviceServer(port=port, num_threads=10, logger=cls.logger) cls.server.start() cls.port = port break except: cls.logger.info( f"Failed to create OptimizerMicroserviceServer on port {port}." ) if num_tries == max_num_tries: raise cls.optimizer_service_channel = grpc.insecure_channel( f'localhost:{cls.port}') cls.bayesian_optimizer_factory = BayesianOptimizerFactory( grpc_channel=cls.optimizer_service_channel, logger=cls.logger) cls.temp_dir = os.path.join(os.getcwd(), "temp") if not os.path.exists(cls.temp_dir): os.mkdir(cls.temp_dir) cls.trace_output_path = os.path.join( cls.temp_dir, "TestBayesianOptimizerTrace.json") try: os.remove(cls.trace_output_path) except OSError: pass
def setUpClass(cls): """ Sets up all the singletons needed to test the BayesianOptimizer. """ warnings.simplefilter("error") global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger(logger_name=cls.__name__) # Start up the gRPC service. # cls.server = OptimizerMicroserviceServer(port=50051, num_threads=10) cls.server.start() cls.optimizer_service_channel = grpc.insecure_channel('localhost:50051') cls.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=cls.optimizer_service_channel, logger=cls.logger)
def main(): args = parse_command_line_arguments() server = OptimizerMicroserviceServer(port=args.port, num_threads=args.num_threads) def ctrl_c_handler(_, __): print("Received CTRL-C: shutting down.") if server.started: print("Shutting down server.") server.stop(grace=None) print("Server stopped.") global_values.declare_singletons() signal.signal(signal.SIGINT, ctrl_c_handler) signal.signal(signal.SIGTERM, ctrl_c_handler) print("Starting Optimizer Microservice ...") server.start() server.wait_for_termination()
class TestBayesianOptimizerGrpcClient(unittest.TestCase): """ Tests the E2E Grpc Client-Service workflow. """ @classmethod def setUpClass(cls): warnings.simplefilter("error") global_values.declare_singletons() def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) objective_function_config = objective_function_config_store.get_config_by_name('2d_quadratic_concave_up') self.objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config) self.optimization_problem = OptimizationProblem( parameter_space=self.objective_function.parameter_space, objective_space=self.objective_function.output_space, objectives=[Objective(name='y', minimize=True)] ) def tearDown(self): """ We need to tear down the gRPC server here. :return: """ self.server.stop(grace=None) def test_echo(self): optimizer_service_stub = OptimizerServiceStub(channel=self.optimizer_service_channel) response = optimizer_service_stub.Echo(Empty()) self.assertTrue(isinstance(response, Empty)) def test_optimizer_with_default_config(self): pre_existing_optimizers = {optimizer.id: optimizer for optimizer in self.optimizer_monitor.get_existing_optimizers()} print(bayesian_optimizer_config_store.default) bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=self.optimization_problem, optimizer_config=bayesian_optimizer_config_store.default ) post_existing_optimizers = {optimizer.id: optimizer for optimizer in self.optimizer_monitor.get_existing_optimizers()} new_optimizers = { optimizer_id: optimizer for optimizer_id, optimizer in post_existing_optimizers.items() if optimizer_id not in pre_existing_optimizers } self.assertTrue(len(new_optimizers) == 1) new_optimizer_id = list(new_optimizers.keys())[0] new_optimizer = new_optimizers[new_optimizer_id] self.assertTrue(new_optimizer_id == bayesian_optimizer.id) self.assertTrue(new_optimizer.optimizer_config == bayesian_optimizer.optimizer_config) num_iterations = 100 registered_features_df, registered_objectives_df = self.optimize_quadratic(optimizer=bayesian_optimizer, num_iterations=num_iterations) # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison. # registered_features_json = registered_features_df.to_json(orient='index', double_precision=15) registered_objectives_json = registered_objectives_df.to_json(orient='index', double_precision=15) # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away. # assert len(registered_features_json) > 0 assert len(registered_objectives_json) > 0 registered_features_df = pd.read_json(registered_features_json, orient='index') registered_objectives_df = pd.read_json(registered_objectives_json, orient='index') observed_features_df, observed_objectives_df = bayesian_optimizer.get_all_observations() self.assertTrue((np.abs(registered_features_df - observed_features_df) < 0.00000001).all().all()) self.assertTrue((np.abs(registered_objectives_df - observed_objectives_df) < 0.00000001).all().all()) # Let's look at the goodness of fit. # random_forest_gof_metrics = bayesian_optimizer.compute_surrogate_model_goodness_of_fit() # The model might not have used all of the samples, but should have used a majority of them (I expect about 90%), but 70% is a good sanity check # and should make this test not very flaky. self.assertTrue(random_forest_gof_metrics.last_refit_iteration_number > 0.7 * num_iterations) # The invariants below should be true for all surrogate models: the random forest, and all constituent decision trees. So let's iterate over them all. models_gof_metrics = [random_forest_gof_metrics] for model_gof_metrics in models_gof_metrics: self.assertTrue(0 <= model_gof_metrics.relative_absolute_error <= 1) # This could fail if the models are really wrong. Not expected in this unit test though. self.assertTrue(0 <= model_gof_metrics.relative_squared_error <= 1) # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it. n = model_gof_metrics.last_refit_iteration_number self.assertTrue(model_gof_metrics.mean_absolute_error <= model_gof_metrics.root_mean_squared_error <= math.sqrt(n) * model_gof_metrics.mean_absolute_error) # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly. self.assertTrue(model_gof_metrics.sample_90_ci_hit_rate >= model_gof_metrics.prediction_90_ci_hit_rate) self.assertTrue(0 <= model_gof_metrics.coefficient_of_determination <= 1) def test_optimizer_with_random_config(self): num_random_restarts = 10 for i in range(num_random_restarts): optimizer_config = bayesian_optimizer_config_store.parameter_space.random() optimizer_config.min_samples_required_for_guided_design_of_experiments = min(optimizer_config.min_samples_required_for_guided_design_of_experiments, 100) if optimizer_config.surrogate_model_implementation == "HomogeneousRandomForestRegressionModel": rf_config = optimizer_config.homogeneous_random_forest_regression_model_config rf_config.n_estimators = min(rf_config.n_estimators, 20) print(f"[{i+1}/{num_random_restarts}] Creating a bayesian optimizer with config: {optimizer_config}") bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=self.optimization_problem, optimizer_config=optimizer_config ) registered_features_df, registered_objectives_df = self.optimize_quadratic(optimizer=bayesian_optimizer, num_iterations=12) # Apparently the to_json/from_json loses precision so we explicitly lose it here so that we can do the comparison. # registered_features_json = registered_features_df.to_json(orient='index', double_precision=15) registered_objectives_json = registered_objectives_df.to_json(orient='index', double_precision=15) # Apparently the jitter is too good and we actually have to use the json strings or they will be optimized away. # assert len(registered_features_json) > 0 assert len(registered_objectives_json) > 0 registered_features_df = pd.read_json(registered_features_json, orient='index') registered_objectives_df = pd.read_json(registered_objectives_json, orient='index') observed_features_df, observed_objectives_df = bayesian_optimizer.get_all_observations() self.assertTrue((np.abs(registered_features_df - observed_features_df) < 0.00000001).all().all()) self.assertTrue((np.abs(registered_objectives_df - observed_objectives_df) < 0.00000001).all().all()) @unittest.skip(reason="Not implemented yet.") def test_optimizer_with_named_config(self): ... def optimize_quadratic(self, optimizer, num_iterations): registered_features_df = None registered_objectives_df = None old_optimum = np.inf for i in range(num_iterations): suggested_params = optimizer.suggest() suggested_params_df = suggested_params.to_dataframe() y = self.objective_function.evaluate_point(suggested_params) optimizer.register(suggested_params_df, y.to_dataframe()) if registered_features_df is None: registered_features_df = suggested_params_df else: registered_features_df = registered_features_df.append(suggested_params_df, ignore_index=True) if registered_objectives_df is None: registered_objectives_df = y.to_dataframe() else: registered_objectives_df = registered_objectives_df.append(y.to_dataframe(), ignore_index=True) best_params, optimum = optimizer.optimum() # ensure current optimum doesn't go up assert optimum.y <= old_optimum old_optimum = optimum.y print(f"[{i+1}/{num_iterations}]Best Params: {best_params}, Best Value: {optimum.y}") return registered_features_df, registered_objectives_df
class TestSmartCacheWithRemoteOptimizer: """ Tests SmartCache that's being tuned by the remote optimizer. This test will: 1. Instantiate a SmartCache. 2. Create an MlosExperiment that connects to a remote or in-process optimizer. 3. Optimize the SmartCache with the help of the remote or in-process optimizer. """ def setup_method(self, method): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.DEBUG # Start up the gRPC service. Try a bunch of times before giving up. # max_num_tries = 100 num_tries = 0 for port in range(50051, 50051 + max_num_tries): num_tries += 1 try: self.server = OptimizerMicroserviceServer(port=port, num_threads=10) self.server.start() self.port = port break except: self.logger.info( f"Failed to create OptimizerMicroserviceServer on port {port}" ) if num_tries == max_num_tries: raise self.optimizer_service_channel = grpc.insecure_channel( f'localhost:{self.port}') self.bayesian_optimizer_factory = BayesianOptimizerFactory( grpc_channel=self.optimizer_service_channel, logger=self.logger) self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context. communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, bayesian_optimizer_grpc_channel=self.optimizer_service_channel) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons( ) # TODO: having both globals and global_values is a problem # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self.mlos_agent.set_configuration( component_type=SmartCacheWorkloadGenerator, new_config_values=Point(workload_type='cyclical_key_from_range', cyclical_key_from_range_config=Point( min=0, range_width=2048))) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator( logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.hit_rate_monitor = HitRateMonitor() self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[ self.working_set_size_estimator, self.hit_rate_monitor ]) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ ContinuousDimension( name="hit_rate", min=0, max=1) ]), objectives=[Objective(name="hit_rate", minimize=False)]) def teardown_method(self, method): mlos_globals.mlos_global_context.stop_clock() self.mlos_agent.stop_all() self.server.stop(grace=None).wait(timeout=1) self.server.wait_for_termination(timeout=1) self.optimizer_service_channel.close() def test_smart_cache_with_remote_optimizer_on_a_timer(self): """ Periodically invokes the optimizer to improve cache performance. """ optimizer_config = bayesian_optimizer_config_store.default optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 5 self.optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=self.optimization_problem, optimizer_config=optimizer_config) self.mlos_agent.start_experiment(self.smart_cache_experiment) num_iterations = 101 for i in range(num_iterations): smart_cache_workload_thread = Thread( target=self.smart_cache_workload.run, args=(0.1, )) smart_cache_workload_thread.start() smart_cache_workload_thread.join() current_cache_config = self.mlos_agent.get_configuration( component_type=SmartCache) features_df = current_cache_config.to_dataframe() hit_rate = self.hit_rate_monitor.get_hit_rate() num_requests = self.hit_rate_monitor.num_requests working_set_size_estimate = self.working_set_size_estimator.estimate_working_set_size( ) objectives_df = pd.DataFrame({'hit_rate': [hit_rate]}) self.optimizer.register(features_df, objectives_df) new_config_values = self.optimizer.suggest() self.mlos_agent.set_configuration( component_type=SmartCache, new_config_values=new_config_values) self.hit_rate_monitor.reset() self.logger.info( f"Previous config: {current_cache_config.to_json()}") self.logger.info( f"Estimated working set size: {working_set_size_estimate.chapman_estimator}. Hit rate: {hit_rate:.2f}. Num requests: {num_requests} " ) self.mlos_agent.stop_experiment(self.smart_cache_experiment) # Let's look at the goodness of fit. # multi_objective_gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit( ) for objective_name, random_forest_gof_metrics in multi_objective_gof_metrics: # The model might not have used all of the samples, but should have used a majority of them (I expect about 90%), but 70% is a good sanity check # and should make this test not very flaky. assert random_forest_gof_metrics.last_refit_iteration_number > 0.5 * num_iterations # Those relative errors should generally be between 0 and 1 unless the model's predictions are worse than predicting average... # This unit tests occasionally doesn't have enough data to get us down to 1 so we'll pass the test if its less than 2. # Note, the point of this test is to check sanity. We'll use a separate suite to evaluate models' performance from an ML standpoint. self.logger.info( f"Relative absolute error: {random_forest_gof_metrics.relative_absolute_error}" ) self.logger.info( f"Relative squared error: {random_forest_gof_metrics.relative_squared_error}" ) assert random_forest_gof_metrics.relative_absolute_error is None or ( 0 <= random_forest_gof_metrics.relative_absolute_error <= 2) assert random_forest_gof_metrics.relative_squared_error is None or ( 0 <= random_forest_gof_metrics.relative_squared_error <= 2) # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it. n = random_forest_gof_metrics.last_refit_iteration_number self.logger.info(f"Last refit iteration number: {n}") self.logger.info( f"Mean absolute error: {random_forest_gof_metrics.mean_absolute_error}" ) self.logger.info( f"Root mean squared error: {random_forest_gof_metrics.root_mean_squared_error}" ) assert random_forest_gof_metrics.mean_absolute_error <= random_forest_gof_metrics.root_mean_squared_error <= math.sqrt( n) * random_forest_gof_metrics.mean_absolute_error # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly. assert random_forest_gof_metrics.sample_90_ci_hit_rate >= random_forest_gof_metrics.prediction_90_ci_hit_rate
class TestBayesianOptimizerGrpcClient(unittest.TestCase): """ Tests the E2E Grpc Client-Service workflow. """ @classmethod def setUpClass(cls): warnings.simplefilter("error") global_values.declare_singletons() def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) # Define the optimization problem. # input_space = SimpleHypergrid( name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ] ) output_space = SimpleHypergrid( name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] ) self.optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)] ) def tearDown(self): """ We need to tear down the gRPC server here. :return: """ self.server.stop(grace=None) def test_optimizer_with_default_config(self): pre_existing_optimizers = {optimizer.id: optimizer for optimizer in self.optimizer_monitor.get_existing_optimizers()} bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=self.optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT ) post_existing_optimizers = {optimizer.id: optimizer for optimizer in self.optimizer_monitor.get_existing_optimizers()} new_optimizers = { optimizer_id: optimizer for optimizer_id, optimizer in post_existing_optimizers.items() if optimizer_id not in pre_existing_optimizers } self.assertTrue(len(new_optimizers) == 1) new_optimizer_id = list(new_optimizers.keys())[0] new_optimizer = new_optimizers[new_optimizer_id] self.assertTrue(new_optimizer_id == bayesian_optimizer.id) self.assertTrue(new_optimizer.optimizer_config == bayesian_optimizer.optimizer_config) num_iterations = 100 self.optimize_quadratic(optimizer=bayesian_optimizer, num_iterations=num_iterations) convergence_state = bayesian_optimizer.get_optimizer_convergence_state() # Now let's make sure we the convergence state is looks reasonable. # random_forest_fit_state = convergence_state.surrogate_model_fit_state # Let's look at the goodness of fit. # random_forest_gof_metrics = random_forest_fit_state.current_train_gof_metrics # The model might not have used all of the samples, but should have used a majority of them (I expect about 90%), but 70% is a good sanity check # and should make this test not very flaky. self.assertTrue(random_forest_gof_metrics.last_refit_iteration_number > 0.7 * num_iterations) # The invariants below should be true for all surrogate models: the random forest, and all constituent decision trees. So let's iterate over them all. models_gof_metrics = [random_forest_gof_metrics] for decision_tree_fit_state in random_forest_fit_state.decision_trees_fit_states: models_gof_metrics.append(decision_tree_fit_state.current_train_gof_metrics) for model_gof_metrics in models_gof_metrics: self.assertTrue(0 <= model_gof_metrics.relative_absolute_error <= 1) # This could fail if the models are really wrong. Not expected in this unit test though. self.assertTrue(0 <= model_gof_metrics.relative_squared_error <= 1) # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it. n = model_gof_metrics.last_refit_iteration_number self.assertTrue(model_gof_metrics.mean_absolute_error <= model_gof_metrics.root_mean_squared_error <= math.sqrt(n) * model_gof_metrics.mean_absolute_error) # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly. self.assertTrue(model_gof_metrics.sample_90_ci_hit_rate >= model_gof_metrics.prediction_90_ci_hit_rate) self.assertTrue(0 <= model_gof_metrics.coefficient_of_determination <= 1) def test_optimizer_with_random_config(self): num_random_restarts = 10 for i in range(num_random_restarts): optimizer_config = BayesianOptimizerConfig.CONFIG_SPACE.random() print(f"[{i+1}/{num_random_restarts}] Creating a bayesian optimizer with config: {optimizer_config.to_dict()}") bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=self.optimization_problem, optimizer_config=optimizer_config ) self.optimize_quadratic(optimizer=bayesian_optimizer, num_iterations=12) @unittest.skip(reason="Not implemented yet.") def test_optimizer_with_named_config(self): ... def optimize_quadratic(self, optimizer, num_iterations): for _ in range(num_iterations): params = optimizer.suggest() params_dict = params.to_dict() features_df = pd.DataFrame(params_dict, index=[0]) prediction = optimizer.predict(features_df) prediction_df = prediction.get_dataframe() y = quadratic(**params_dict) print(f"Params: {params}, Actual: {y}, Prediction: {str(prediction_df)}") objectives_df = pd.DataFrame({'y': [y]}) optimizer.register(features_df, objectives_df)