def test_tracer(self): global_values.declare_singletons() global_values.tracer = Tracer(actor_id='1', thread_id='0') try: fib_number = self.fibonacci(n=3) except: pass trace_events = global_values.tracer.trace_events reformatted_events = Tracer.reformat_events(trace_events) assert len(trace_events) > 0 print(json.dumps(reformatted_events, indent=2))
def setUpClass(cls) -> None: mlos.global_values.declare_singletons() mlos.global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.temp_dir = os.path.join(os.getcwd(), "temp") if not os.path.exists(cls.temp_dir): os.mkdir(cls.temp_dir)
def setUpClass(cls): """Sets up all the singletons needed to test the BayesianOptimizer. """ warnings.simplefilter("error") global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger(logger_name=cls.__name__)
def setup_class(cls) -> None: mlos.global_values.declare_singletons() mlos.global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.temp_dir = os.path.join(os.getcwd(), "temp") if not os.path.exists(cls.temp_dir): os.mkdir(cls.temp_dir) cls.logger = create_logger("TestOptimizerEvaluator")
def setup_class(cls): """ Set's up all the objects needed to test the RandomSearchOptimizer To test the RandomSearchOptimizer we need to first construct: * an optimization problem * a utility function To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction test. :return: """ global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) objective_function_config = objective_function_config_store.get_config_by_name( '2d_quadratic_concave_up') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) cls.input_space = objective_function.parameter_space cls.output_space = objective_function.output_space cls.input_values_dataframe = objective_function.parameter_space.random_dataframe( num_samples=2500) cls.output_values_dataframe = objective_function.evaluate_dataframe( cls.input_values_dataframe) cls.model_config = homogeneous_random_forest_config_store.default print(cls.model_config) cls.model = MultiObjectiveHomogeneousRandomForest( model_config=cls.model_config, input_space=cls.input_space, output_space=cls.output_space) cls.model.fit(cls.input_values_dataframe, cls.output_values_dataframe, iteration_number=len(cls.input_values_dataframe.index)) cls.utility_function_config = Point( utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) cls.optimization_problem = OptimizationProblem( parameter_space=cls.input_space, objective_space=cls.output_space, objectives=[Objective(name='y', minimize=True)]) cls.utility_function = ConfidenceBoundUtilityFunction( function_config=cls.utility_function_config, surrogate_model=cls.model, minimize=cls.optimization_problem.objectives[0].minimize)
def setup_class(cls): """ Set's up all the objects needed to test the UtilityFunctionOptimizers To test the UtilityFunctionOptimizers we need to first construct: * an objective function for the model to approximate and its corresponding parameter and output spaces * an optimization problem * a regression model, then train it on some random parameters to the objective function * a utility function that utilizes the model * a pareto frontier over the random parameters And only then do we get to test our utility function optimizers. This is a lot of work and a somewhat cleaner approach would be to simply create an instance of the BayesianOptimizer to do all of the above for us, but then we might not be able to test the utility function optimizers as thoroughly as we need to. :return: """ global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger("TestUtilityFunctionOptimizers") cls.model_config = multi_objective_pass_through_model_config_store.default cls.model = MultiObjectivePassThroughModelForTesting( model_config=cls.model_config, logger=cls.logger ) cls.objective_function = cls.model.objective_function cls.parameter_space = cls.objective_function.parameter_space cls.objective_space = cls.objective_function.output_space cls.optimization_problem = cls.objective_function.default_optimization_problem cls.utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) cls.utility_function = ConfidenceBoundUtilityFunction( function_config=cls.utility_function_config, surrogate_model=cls.model, minimize=cls.optimization_problem.objectives[0].minimize, logger=cls.logger ) # To make the pareto frontier we have to generate some random points. # cls.parameters_df = cls.objective_function.parameter_space.random_dataframe(1000) cls.objectives_df = cls.objective_function.evaluate_dataframe(cls.parameters_df) cls.pareto_frontier = ParetoFrontier( optimization_problem=cls.optimization_problem, objectives_df=cls.objectives_df, parameters_df=cls.parameters_df )
def setup_class(cls): """ Sets up all the singletons needed to test the BayesianOptimizer. """ warnings.simplefilter("error") global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger(logger_name=cls.__name__) cls.logger.setLevel(logging.DEBUG) cls.port = None # Start up the gRPC service. Try a bunch of ports, before giving up so we can do several in parallel. # max_num_tries = 100 num_tries = 0 for port in range(50051, 50051 + max_num_tries): num_tries += 1 try: cls.server = OptimizerMicroserviceServer(port=port, num_threads=10, logger=cls.logger) cls.server.start() cls.port = port break except: cls.logger.info( f"Failed to create OptimizerMicroserviceServer on port {port}." ) if num_tries == max_num_tries: raise cls.optimizer_service_channel = grpc.insecure_channel( f'localhost:{cls.port}') cls.bayesian_optimizer_factory = BayesianOptimizerFactory( grpc_channel=cls.optimizer_service_channel, logger=cls.logger) cls.temp_dir = os.path.join(os.getcwd(), "temp") if not os.path.exists(cls.temp_dir): os.mkdir(cls.temp_dir) cls.trace_output_path = os.path.join( cls.temp_dir, "TestBayesianOptimizerTrace.json") try: os.remove(cls.trace_output_path) except OSError: pass
def setUpClass(cls): """ Sets up all the singletons needed to test the BayesianOptimizer. """ warnings.simplefilter("error") global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger(logger_name=cls.__name__) # Start up the gRPC service. # cls.server = OptimizerMicroserviceServer(port=50051, num_threads=10) cls.server.start() cls.optimizer_service_channel = grpc.insecure_channel('localhost:50051') cls.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=cls.optimizer_service_channel, logger=cls.logger)
def setUpClass(cls): """ Set's up all the objects needed to test the RandomSearchOptimizer To test the RandomSearchOptimizer we need to first construct: * an optimization problem * a utility function To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction test. :return: """ cls.temp_dir = os.path.join(os.getcwd(), "temp") if not os.path.exists(cls.temp_dir): os.mkdir(cls.temp_dir) global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger(logger_name=cls.__name__)
def setup_class(cls) -> None: global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0)
def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too-many-statements,too-many-branches evaluation_report = OptimizerEvaluationReport( optimizer_configuration=self.optimizer_config, objective_function_configuration=self.objective_function_config, num_optimization_iterations=self.optimizer_evaluator_config.num_iterations, evaluation_frequency=self.optimizer_evaluator_config.evaluation_frequency ) if self.optimizer_evaluator_config.include_execution_trace_in_report: mlos.global_values.declare_singletons() if mlos.global_values.tracer is None: mlos.global_values.tracer = Tracer() mlos.global_values.tracer.clear_events() if self.optimizer_evaluator_config.include_pickled_objective_function_in_report: evaluation_report.pickled_objective_function_initial_state = pickle.dumps(self.objective_function) if self.optimizer_evaluator_config.include_pickled_optimizer_in_report: evaluation_report.pickled_optimizer_initial_state = pickle.dumps(self.optimizer) multi_objective_regression_model_fit_state = MultiObjectiveRegressionModelFitState(objective_names=self.optimizer.optimization_problem.objective_names) for objective_name in self.optimizer.optimization_problem.objective_names: multi_objective_regression_model_fit_state[objective_name] = RegressionModelFitState() optima_over_time = {} optima_over_time[OptimumDefinition.BEST_OBSERVATION.value] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.BEST_OBSERVATION ) optima_over_time[OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG.value] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG ) optima_over_time[f"{OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG.value}_99"] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01 ) optima_over_time[f"{OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG.value}_99"] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01 ) ##################################################################################################### evaluation_report.start_time = datetime.utcnow() i = 0 try: with traced(scope_name="optimization_loop"): for i in range(self.optimizer_evaluator_config.num_iterations): parameters = self.optimizer.suggest() objectives = self.objective_function.evaluate_point(parameters) self.optimizer.register(parameters.to_dataframe(), objectives.to_dataframe()) if i % self.optimizer_evaluator_config.evaluation_frequency == 0: self.logger.info(f"[{i + 1}/{self.optimizer_evaluator_config.num_iterations}]") with traced(scope_name="evaluating_optimizer"): if self.optimizer_evaluator_config.include_pickled_optimizer_in_report: evaluation_report.add_pickled_optimizer(iteration=i, pickled_optimizer=pickle.dumps(self.optimizer)) if self.optimizer.trained: multi_objective_gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit() for objective_name, gof_metrics in multi_objective_gof_metrics: multi_objective_regression_model_fit_state[objective_name].set_gof_metrics( data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics ) for optimum_name, optimum_over_time in optima_over_time.items(): try: config, value = self.optimizer.optimum( optimum_definition=optimum_over_time.optimum_definition, alpha=optimum_over_time.alpha ) optima_over_time[optimum_name].add_optimum_at_iteration( iteration=i, optimum_config=config, optimum_value=value ) except ValueError as e: self.logger.info(f"Failed to get {optimum_name} optimum.", exc_info=True) if self.optimizer_evaluator_config.report_pareto_over_time: evaluation_report.pareto_over_time[i] = copy.deepcopy(self.optimizer.optimization_problem) if self.optimizer_evaluator_config.report_pareto_volume_over_time: volume_estimator = self.optimizer.pareto_frontier.approximate_pareto_volume() ci99_on_volume = volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.01) evaluation_report.pareto_volume_over_time[i] = ci99_on_volume evaluation_report.success = True except Exception as e: evaluation_report.success = False evaluation_report.exception = e evaluation_report.exception_traceback = traceback.format_exc() evaluation_report.end_time = datetime.utcnow() with traced(scope_name="evaluating_optimizer"): # Once the optimization is done, we perform a final evaluation of the optimizer. if self.optimizer.trained: multi_objective_gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit() for objective_name, gof_metrics in multi_objective_gof_metrics: multi_objective_regression_model_fit_state[objective_name].set_gof_metrics(data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics) for optimum_name, optimum_over_time in optima_over_time.items(): try: config, value = self.optimizer.optimum(optimum_definition=optimum_over_time.optimum_definition, alpha=optimum_over_time.alpha) optima_over_time[optimum_name].add_optimum_at_iteration( iteration=self.optimizer_evaluator_config.num_iterations, optimum_config=config, optimum_value=value ) except Exception as e: self.logger.info(f"Failed to get {optimum_name} optimum.", exc_info=True) if self.optimizer_evaluator_config.report_pareto_over_time: evaluation_report.pareto_over_time[i] = copy.deepcopy(self.optimizer.optimization_problem) if self.optimizer_evaluator_config.report_pareto_volume_over_time: volume_estimator = self.optimizer.pareto_frontier.approximate_pareto_volume() ci99_on_volume = volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.01) evaluation_report.pareto_volume_over_time[i] = ci99_on_volume if self.optimizer_evaluator_config.include_execution_trace_in_report: evaluation_report.execution_trace = mlos.global_values.tracer.trace_events mlos.global_values.tracer.clear_events() if self.optimizer_evaluator_config.include_pickled_optimizer_in_report: evaluation_report.add_pickled_optimizer(iteration=i, pickled_optimizer=pickle.dumps(self.optimizer)) if self.optimizer_evaluator_config.include_pickled_objective_function_in_report: evaluation_report.pickled_objective_function_final_state = pickle.dumps(self.objective_function) if self.optimizer_evaluator_config.report_regression_model_goodness_of_fit: evaluation_report.regression_model_fit_state = multi_objective_regression_model_fit_state if self.optimizer_evaluator_config.report_optima_over_time: evaluation_report.optima_over_time = optima_over_time return evaluation_report
def write_to_disk(self, target_folder): """Writes the report to disk. The layout on disk is as follows: - optimizer_config.json - objective_function_config.json - goodness_of_fit.pickle - objective_function_initial_state.pickle - objective_function_final_state.pickle - execution_trace.json - execution_info.json - pickled_optimizers: - {iteration_number}.pickle """ optimizer_config_file = os.path.join(target_folder, "optimizer_config.json") with open(optimizer_config_file, 'w') as out_file: out_file.write(self.optimizer_configuration.to_json(indent=2)) objective_function_config_file = os.path.join( target_folder, "objective_function_config.json") with open(objective_function_config_file, 'w') as out_file: out_file.write( self.objective_function_configuration.to_json(indent=2)) if len(self.pickled_optimizers_over_time) > 0: pickled_optimizers_dir = os.path.join(target_folder, "pickled_optimizers") if not os.path.exists(pickled_optimizers_dir): os.mkdir(pickled_optimizers_dir) for iteration, pickled_optimizer in self.pickled_optimizers_over_time.items( ): with open( os.path.join(pickled_optimizers_dir, f"{iteration}.pickle"), 'wb') as out_file: out_file.write(pickled_optimizer) if self.pickled_objective_function_initial_state is not None: with open( os.path.join(target_folder, "objective_function_initial_state.pickle"), "wb") as out_file: out_file.write(self.pickled_objective_function_initial_state) if self.pickled_objective_function_final_state is not None: with open( os.path.join(target_folder, "objective_function_final_state.pickle"), "wb") as out_file: out_file.write(self.pickled_objective_function_final_state) if self.regression_model_fit_state is not None: with open( os.path.join( target_folder, "regression_model_goodness_of_fit_state.pickle"), "wb") as out_file: pickle.dump(self.regression_model_fit_state, out_file) if self.optima_over_time is not None: with open(os.path.join(target_folder, "optima_over_time.pickle"), "wb") as out_file: pickle.dump(self.optima_over_time, out_file) if len(self.pareto_over_time) > 0: with open(os.path.join(target_folder, "pareto_over_time.pickle"), "wb") as out_file: pickle.dump(self.pareto_over_time, out_file) if len(self.pareto_volume_over_time) > 0: with open( os.path.join(target_folder, "pareto_volume_over_time.json"), "w") as out_file: json.dump(self.pareto_volume_over_time, out_file, indent=2) if self.execution_trace is not None: tracer = Tracer() tracer.trace_events = self.execution_trace tracer.dump_trace_to_file(output_file_path=os.path.join( target_folder, "execution_trace.json")) with open(os.path.join(target_folder, "execution_info.json"), 'w') as out_file: execution_info_dict = { 'success': self.success, 'num_optimization_iterations': self.num_optimization_iterations, 'evaluation_frequency': self.evaluation_frequency, 'exception': str(self.exception) if self.exception is not None else None, 'exception_stack_trace': self.exception_traceback, 'start_time': self.start_time.strftime(self.DATETIME_FORMAT), 'end_time': self.end_time.strftime(self.DATETIME_FORMAT) } json.dump(execution_info_dict, out_file, indent=2)
def evaluate_optimizer(self) -> OptimizerEvaluationReport: # pylint: disable=too-many-statements evaluation_report = OptimizerEvaluationReport( optimizer_configuration=self.optimizer_config, objective_function_configuration=self.objective_function_config, num_optimization_iterations=self.optimizer_evaluator_config.num_iterations, evaluation_frequency=self.optimizer_evaluator_config.evaluation_frequency, ) if self.optimizer_evaluator_config.include_execution_trace_in_report: mlos.global_values.declare_singletons() if mlos.global_values.tracer is None: mlos.global_values.tracer = Tracer() mlos.global_values.tracer.clear_events() if self.optimizer_evaluator_config.include_pickled_objective_function_in_report: evaluation_report.pickled_objective_function_initial_state = pickle.dumps(self.objective_function) if self.optimizer_evaluator_config.include_pickled_optimizer_in_report: evaluation_report.pickled_optimizer_initial_state = pickle.dumps(self.optimizer) regression_model_fit_state = RegressionModelFitState() optima_over_time = {} optima_over_time[OptimumDefinition.BEST_OBSERVATION.value] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.BEST_OBSERVATION ) optima_over_time[OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG.value] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.PREDICTED_VALUE_FOR_OBSERVED_CONFIG ) optima_over_time[f"{OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG.value}_99"] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.UPPER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01 ) optima_over_time[f"{OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG.value}_99"] = OptimumOverTime( optimization_problem=self.optimizer.optimization_problem, optimum_definition=OptimumDefinition.LOWER_CONFIDENCE_BOUND_FOR_OBSERVED_CONFIG, alpha=0.01 ) ##################################################################################################### i = 0 try: with traced(scope_name="optimization_loop"): for i in range(self.optimizer_evaluator_config.num_iterations): parameters = self.optimizer.suggest() objectives = self.objective_function.evaluate_point(parameters) self.optimizer.register(parameters.to_dataframe(), objectives.to_dataframe()) if i % self.optimizer_evaluator_config.evaluation_frequency == 0: print(f"[{i + 1}/{self.optimizer_evaluator_config.num_iterations}]") with traced(scope_name="evaluating_optimizer"): if self.optimizer_evaluator_config.include_pickled_optimizer_in_report: evaluation_report.add_pickled_optimizer(iteration=i, pickled_optimizer=pickle.dumps(self.optimizer)) if self.optimizer.trained: gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit() regression_model_fit_state.set_gof_metrics(data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics) for optimum_name, optimum_over_time in optima_over_time.items(): try: config, value = self.optimizer.optimum( optimum_definition=optimum_over_time.optimum_definition, alpha=optimum_over_time.alpha ) optima_over_time[optimum_name].add_optimum_at_iteration( iteration=i, optimum_config=config, optimum_value=value ) except ValueError as e: print(e) evaluation_report.success = True except Exception as e: evaluation_report.success = False evaluation_report.exception = e evaluation_report.exception_traceback = traceback.format_exc() with traced(scope_name="evaluating_optimizer"): """Once the optimization is done, we performa final evaluation of the optimizer.""" if self.optimizer.trained: gof_metrics = self.optimizer.compute_surrogate_model_goodness_of_fit() regression_model_fit_state.set_gof_metrics(data_set_type=DataSetType.TRAIN, gof_metrics=gof_metrics) for optimum_name, optimum_over_time in optima_over_time.items(): try: config, value = self.optimizer.optimum(optimum_definition=optimum_over_time.optimum_definition, alpha=optimum_over_time.alpha) optima_over_time[optimum_name].add_optimum_at_iteration( iteration=self.optimizer_evaluator_config.num_iterations, optimum_config=config, optimum_value=value ) except Exception as e: print(e) if self.optimizer_evaluator_config.include_execution_trace_in_report: evaluation_report.execution_trace = mlos.global_values.tracer.trace_events mlos.global_values.tracer.clear_events() if self.optimizer_evaluator_config.include_pickled_optimizer_in_report: evaluation_report.add_pickled_optimizer(iteration=i, pickled_optimizer=pickle.dumps(self.optimizer)) if self.optimizer_evaluator_config.include_pickled_objective_function_in_report: evaluation_report.pickled_objective_function_final_state = pickle.dumps(self.objective_function) if self.optimizer_evaluator_config.report_regression_model_goodness_of_fit: evaluation_report.regression_model_goodness_of_fit_state = regression_model_fit_state if self.optimizer_evaluator_config.report_optima_over_time: evaluation_report.optima_over_time = optima_over_time return evaluation_report