class SmartCacheWorkloadLauncher: """Prepares the mlos infrastructure and launches SmartCacheWorkload. Parameters ---------- logger : Logger Attributes ---------- mlos_agent : MlosAgent """ def __init__(self, logger): mlos_globals.init_mlos_global_context() self.mlos_agent = MlosAgent( logger=logger, communication_channel=mlos_globals.mlos_global_context.communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, ) self._mlos_agent_thread = Thread(target=self.mlos_agent.run) self._mlos_agent_thread.start() self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self._smart_cache_workload = SmartCacheWorkloadGenerator(logger=logger) self._smart_cache_workload_thread = None def start_workload(self, duration_s=1, block=True): self._smart_cache_workload_thread = Thread(target=self._smart_cache_workload.run, args=(duration_s,)) self._smart_cache_workload_thread.start() if block: self._smart_cache_workload_thread.join()
def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.DEBUG # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_grpc_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_grpc_channel, logger=self.logger) self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context.communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, bayesian_optimizer_grpc_channel=self.optimizer_service_grpc_channel ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons() # TODO: having both globals and global_values is a problem # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self.mlos_agent.set_configuration( component_type=SmartCacheWorkloadGenerator, new_config_values=Point( workload_type='cyclical_key_from_range', cyclical_key_from_range_config=Point( min=0, range_width=2048 ) ) ) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator(logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.hit_rate_monitor = HitRateMonitor() self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[self.working_set_size_estimator, self.hit_rate_monitor] ) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ContinuousDimension(name="hit_rate", min=0, max=1)]), objectives=[Objective(name="hit_rate", minimize=False)] )
def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.INFO self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context. communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons( ) # TODO: having both globals and global_values is a problem self.workload_duration_s = 5 # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator( logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.cache_config_timer = Timer( timeout_ms=200, observer_callback=self._set_new_cache_configuration) self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[ self.cache_config_timer, self.working_set_size_estimator ]) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ ContinuousDimension( name="miss_rate", min=0, max=1) ]), context_space=None, # TODO: add working set size estimate objectives=[Objective(name="miss_rate", minimize=True)])
def __init__(self, logger): mlos_globals.init_mlos_global_context() self.mlos_agent = MlosAgent( logger=logger, communication_channel=mlos_globals.mlos_global_context.communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, ) self._mlos_agent_thread = Thread(target=self.mlos_agent.run) self._mlos_agent_thread.start() self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self._smart_cache_workload = SmartCacheWorkloadGenerator(logger=logger) self._smart_cache_workload_thread = None
def setUpClass(cls) -> None: mlos_globals.init_mlos_global_context() cls.logger = create_logger('TestE2EScenarios') cls.logger.level = logging.INFO cls.mlos_agent = MlosAgent( logger=cls.logger, communication_channel=mlos_globals.mlos_global_context. communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config) cls.mlos_agent_thread = Thread(target=cls.mlos_agent.run) cls.mlos_agent_thread.start() mlos_globals.mlos_global_context.start_clock() cls.mlos_agent.add_allowed_component_type(SmartCache) cls.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator)
class TestSmartCacheWithRemoteOptimizer: """ Tests SmartCache that's being tuned by the remote optimizer. This test will: 1. Instantiate a SmartCache. 2. Create an MlosExperiment that connects to a remote or in-process optimizer. 3. Optimize the SmartCache with the help of the remote or in-process optimizer. """ def setup_method(self, method): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.DEBUG # Start up the gRPC service. Try a bunch of times before giving up. # max_num_tries = 100 num_tries = 0 for port in range(50051, 50051 + max_num_tries): num_tries += 1 try: self.server = OptimizerServicesServer(port=port, num_threads=10) self.server.start() self.port = port break except: self.logger.info( f"Failed to create OptimizerMicroserviceServer on port {port}" ) if num_tries == max_num_tries: raise self.optimizer_service_channel = grpc.insecure_channel( f'localhost:{self.port}') self.bayesian_optimizer_factory = BayesianOptimizerFactory( grpc_channel=self.optimizer_service_channel, logger=self.logger) self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context. communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, bayesian_optimizer_grpc_channel=self.optimizer_service_channel) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons( ) # TODO: having both globals and global_values is a problem # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self.mlos_agent.set_configuration( component_type=SmartCacheWorkloadGenerator, new_config_values=Point(workload_type='cyclical_key_from_range', cyclical_key_from_range_config=Point( min=0, range_width=2048))) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator( logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.hit_rate_monitor = HitRateMonitor() self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[ self.working_set_size_estimator, self.hit_rate_monitor ]) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ ContinuousDimension( name="hit_rate", min=0, max=1) ]), objectives=[Objective(name="hit_rate", minimize=False)]) def teardown_method(self, method): mlos_globals.mlos_global_context.stop_clock() self.mlos_agent.stop_all() self.server.stop(grace=None).wait(timeout=1) self.server.wait_for_termination(timeout=1) self.optimizer_service_channel.close() def test_smart_cache_with_remote_optimizer_on_a_timer(self): """ Periodically invokes the optimizer to improve cache performance. """ optimizer_config = bayesian_optimizer_config_store.default optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 5 self.optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=self.optimization_problem, optimizer_config=optimizer_config) self.mlos_agent.start_experiment(self.smart_cache_experiment) num_iterations = 101 for i in range(num_iterations): smart_cache_workload_thread = Thread( target=self.smart_cache_workload.run, args=(0.1, )) smart_cache_workload_thread.start() smart_cache_workload_thread.join() current_cache_config = self.mlos_agent.get_configuration( component_type=SmartCache) features_df = current_cache_config.to_dataframe() hit_rate = self.hit_rate_monitor.get_hit_rate() num_requests = self.hit_rate_monitor.num_requests working_set_size_estimate = self.working_set_size_estimator.estimate_working_set_size( ) objectives_df = pd.DataFrame({'hit_rate': [hit_rate]}) self.optimizer.register(features_df, objectives_df) new_config_values = self.optimizer.suggest() self.mlos_agent.set_configuration( component_type=SmartCache, new_config_values=new_config_values) self.hit_rate_monitor.reset() self.logger.info( f"Previous config: {current_cache_config.to_json()}") self.logger.info( f"Estimated working set size: {working_set_size_estimate.chapman_estimator}. Hit rate: {hit_rate:.2f}. Num requests: {num_requests} " ) self.mlos_agent.stop_experiment(self.smart_cache_experiment) # Let's look at the goodness of fit. # multi_objective_gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit( ) for objective_name, random_forest_gof_metrics in multi_objective_gof_metrics: # The model might not have used all of the samples, but should have used a majority of them (I expect about 90%), but 70% is a good sanity check # and should make this test not very flaky. assert random_forest_gof_metrics.last_refit_iteration_number > 0.5 * num_iterations # Those relative errors should generally be between 0 and 1 unless the model's predictions are worse than predicting average... # This unit tests occasionally doesn't have enough data to get us down to 1 so we'll pass the test if its less than 2. # Note, the point of this test is to check sanity. We'll use a separate suite to evaluate models' performance from an ML standpoint. self.logger.info( f"Relative absolute error: {random_forest_gof_metrics.relative_absolute_error}" ) self.logger.info( f"Relative squared error: {random_forest_gof_metrics.relative_squared_error}" ) assert random_forest_gof_metrics.relative_absolute_error is None or ( 0 <= random_forest_gof_metrics.relative_absolute_error <= 2) assert random_forest_gof_metrics.relative_squared_error is None or ( 0 <= random_forest_gof_metrics.relative_squared_error <= 2) # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it. n = random_forest_gof_metrics.last_refit_iteration_number self.logger.info(f"Last refit iteration number: {n}") self.logger.info( f"Mean absolute error: {random_forest_gof_metrics.mean_absolute_error}" ) self.logger.info( f"Root mean squared error: {random_forest_gof_metrics.root_mean_squared_error}" ) assert random_forest_gof_metrics.mean_absolute_error <= random_forest_gof_metrics.root_mean_squared_error <= math.sqrt( n) * random_forest_gof_metrics.mean_absolute_error # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly. assert random_forest_gof_metrics.sample_90_ci_hit_rate >= random_forest_gof_metrics.prediction_90_ci_hit_rate
class TestSmartCacheWithRemoteOptimizer(unittest.TestCase): """ Tests SmartCache that's being tuned by the remote optimizer. This test will: 1. Instantiate a SmartCache. 2. Create an MlosExperiment that connects to a remote or in-process optimizer. 3. Optimize the SmartCache with the help of the remote or in-process optimizer. """ def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.INFO self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context. communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons( ) # TODO: having both globals and global_values is a problem self.workload_duration_s = 5 # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator( logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.cache_config_timer = Timer( timeout_ms=200, observer_callback=self._set_new_cache_configuration) self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[ self.cache_config_timer, self.working_set_size_estimator ]) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ ContinuousDimension( name="miss_rate", min=0, max=1) ]), context_space=None, # TODO: add working set size estimate objectives=[Objective(name="miss_rate", minimize=True)]) def tearDown(self): mlos_globals.mlos_global_context.stop_clock() self.mlos_agent.stop_all() @unittest.skip(reason="SQL Server is not available in GCI at the moment.") def test_smart_cache_with_remote_optimizer_on_a_timer(self): """ Periodically invokes the optimizer to improve cache performance. """ # Let's create an optimizer connection_string = ConnectionString.create_from_config_file( os.path.abspath( os.path.join(os.getcwd(), "Secrets", "local_connection_string.json"))) global_values.ml_model_services_proxy = MlosOptimizationServicesProxy( models_database_connection_string=connection_string) self.optimizer = DistributableSimpleBayesianOptimizer.create_remote_model( models_database=global_values.ml_model_services_proxy. models_database, optimization_problem=self.optimization_problem) TODO = """ There are so many things wrong here that an essay is in order. 1. The entire DistributableSimpleBayesianOptimizer is to be thrown out. 2. We need an Optimizer API that: 1. Will be standard across many types of optimizers. 2. Will let us specify: 1. The search space 2. The context space 3. The target values We should generate client libraries along with MlosModelServices for Python and C# (at least). I suppose that's the next task after this test is turned on. """ self.mlos_agent.start_experiment(self.smart_cache_experiment) # Let's launch the smart_cache_workload smart_cache_workload_thread = Thread( target=self.smart_cache_workload.run, args=(self.workload_duration_s, )) smart_cache_workload_thread.start() smart_cache_workload_thread.join() self.mlos_agent.stop_experiment(self.smart_cache_experiment) def test_smart_cache_with_in_process_optimizer_on_a_timer2(self): """ Periodically invokes the optimizer to improve cache performance. """ # Let's create an optimizer self.optimizer = DistributableSimpleBayesianOptimizer( optimization_problem=self.optimization_problem) self.mlos_agent.start_experiment(self.smart_cache_experiment) # Let's launch the smart_cache_workload smart_cache_workload_thread = Thread( target=self.smart_cache_workload.run, args=(self.workload_duration_s, )) smart_cache_workload_thread.start() smart_cache_workload_thread.join() self.mlos_agent.stop_experiment(self.smart_cache_experiment) def _set_new_cache_configuration(self, elapsed_time_ms): """ This is where we would potentially query the optimizer. :param elapsed_time_ms: :return: """ new_config_values = self.optimizer.suggest() new_config_values = Point( **new_config_values ) # TODO: this Point() should not be necessary here self.mlos_agent.set_configuration(component_type=SmartCache, new_config_values=new_config_values) current_estimate = self.working_set_size_estimator.estimate_working_set_size( ) self.logger.info( f"Estimated working set size: {current_estimate.chapman_estimator}" )