def setUpClass(cls) -> None: global_values.declare_singletons() cls.slope = 10 cls.y_intercept = 10 cls.input_values = np.linspace(start=0, stop=100, num=1000, endpoint=True) cls.output_values = cls.input_values * cls.slope + cls.y_intercept cls.input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=100)] ) cls.output_space = SimpleHypergrid( name="output", dimensions=[ContinuousDimension(name="y", min=-math.inf, max=math.inf)] ) cls.input_pandas_dataframe = pd.DataFrame({"x": cls.input_values}) cls.output_pandas_dataframe = pd.DataFrame({"y": cls.output_values}) cls.model_config = HomogeneousRandomForestRegressionModelConfig() cls.model = HomogeneousRandomForestRegressionModel( model_config=cls.model_config, input_space=cls.input_space, output_space=cls.output_space ) cls.model.fit(cls.input_pandas_dataframe, cls.output_pandas_dataframe, iteration_number=len(cls.input_pandas_dataframe.index)) cls.sample_inputs = {'x': np.linspace(start=-10, stop=110, num=13, endpoint=True)} cls.sample_inputs_pandas_dataframe = pd.DataFrame(cls.sample_inputs) cls.sample_predictions = cls.model.predict(cls.sample_inputs_pandas_dataframe)
def setUpClass(cls) -> None: cls.simple_hypergrid = SimpleHypergrid( name='simple_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 1]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[1, False, 'two']) ] ) cls.hierarchical_hypergrid = SimpleHypergrid( name='hierarchical_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, False, 'two']) ] ).join( subgrid=SimpleHypergrid( name="nested_grid", dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', False, True, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, 'two', False]) ] ), on_external_dimension=CategoricalDimension("categorical_mixed_types", values=[True]) )
def __init__(self, objective_function_config: Point = None): assert objective_function_config in enveloped_waves_config_space, f"{objective_function_config} not in {enveloped_waves_config_space}" ObjectiveFunctionBase.__init__(self, objective_function_config) self._parameter_space = SimpleHypergrid( name="domain", dimensions=[ ContinuousDimension(name=f"x_{i}", min=0, max=objective_function_config.num_periods * objective_function_config.period) for i in range(self.objective_function_config.num_params) ] ) self._output_space = SimpleHypergrid( name="range", dimensions=[ ContinuousDimension(name="y", min=-math.inf, max=math.inf) ] ) if self.objective_function_config.envelope_type == "linear": self._envelope = self._linear_envelope elif self.objective_function_config.envelope_type == "quadratic": self._envelope = self._quadratic_envelope elif self.objective_function_config.envelope_type == "sine": self._envelope = self._sine_envelope else: self._envelope = lambda x: x * 0 + 1
def test_pareto_frontier_volume_simple(self): """A simple sanity test on the pareto frontier volume computations. """ # Let's generate a pareto frontier in 2D. ALl points lay on a line y = 1 - x x = np.linspace(start=0, stop=1, num=100) y = 1 - x pareto_df = pd.DataFrame({'x': x, 'y': y}) optimization_problem = OptimizationProblem( parameter_space=None, objective_space=SimpleHypergrid(name='objectives', dimensions=[ ContinuousDimension(name='x', min=0, max=1), ContinuousDimension(name='y', min=0, max=1) ]), objectives=[ Objective(name='x', minimize=False), Objective(name='y', minimize=False) ]) pareto_frontier = ParetoFrontier(optimization_problem, pareto_df) pareto_volume_estimator = pareto_frontier.approximate_pareto_volume( num_samples=1000000) lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume( alpha=0.05) print(lower_bound, upper_bound) assert 0.49 < lower_bound < upper_bound < 0.51
def setUp(self): # Let's create a simple quadratic response function self.input_space = SimpleHypergrid(name="2d_X_search_domain", dimensions=[ ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0) ]) self.output_space = SimpleHypergrid( name="degree2_polynomial", dimensions=[ ContinuousDimension(name="degree2_polynomial_y", min=-10**15, max=10**15) ]) lasso_model_config = SklearnLassoRegressionModelConfig.DEFAULT rf_model_config = SklearnRandomForestRegressionModelConfig.DEFAULT self.model_config = \ RegressionEnhancedRandomForestRegressionModelConfig( max_basis_function_degree=2, min_abs_root_model_coef=0.02, boosting_root_model_name=SklearnLassoRegressionModelConfig.__name__, boosting_root_model_config=lasso_model_config, random_forest_model_config=rf_model_config, perform_initial_root_model_hyper_parameter_search=True, perform_initial_random_forest_hyper_parameter_search=True)
def setUp(self): # Let's create a simple linear mapping self.slope = 10 self.y_intercept = 10 self.input_values = np.linspace(start=0, stop=100, num=1001, endpoint=True) self.input_output_mapping = lambda input: input * self.slope + self.y_intercept self.output_values = self.input_output_mapping(self.input_values) self.input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=100)]) self.output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension( name="y", min=-math.inf, max=math.inf) ]) self.input_pandas_dataframe = pd.DataFrame({"x": self.input_values}) self.output_pandas_dataframe = pd.DataFrame({"y": self.output_values})
def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) # Define the optimization problem. # input_space = SimpleHypergrid( name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ] ) output_space = SimpleHypergrid( name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] ) self.optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)] )
def test_optimum_before_register_error(self): input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name='x', min=-10, max=10)]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default) with pytest.raises(ValueError): bayesian_optimizer.optimum() bayesian_optimizer.register( parameter_values_pandas_frame=pd.DataFrame({'x': [0.0]}), target_values_pandas_frame=pd.DataFrame({'y': [1.0]})) bayesian_optimizer.optimum()
def _build_simple_hypergrid_target(self) -> None: """ Builds a SimpleHypergrid target for a SimpleHypergrid adaptee. :return: """ self._target = SimpleHypergrid(name=self._adaptee.name, dimensions=None, random_state=self._adaptee.random_state) # Now we iterate over all dimensions and when necessary map the CategoricalDimensions to DiscreteDimensions # for adaptee_dimension in self._adaptee.dimensions: if isinstance(adaptee_dimension, DiscreteDimension): target_dimension = ContinuousDimension( name=adaptee_dimension.name, min=0, max=1, include_max=False) else: target_dimension = ContinuousDimension( name=adaptee_dimension.name, min=0, max=1, include_min=adaptee_dimension.include_min, include_max=adaptee_dimension.include_max) self._target.add_dimension(target_dimension) self._adaptee_to_target_dimension_mappings[ adaptee_dimension.name] = target_dimension self._target_to_adaptee_dimension_mappings[ target_dimension.name] = adaptee_dimension
def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. :return: """ input_space = SimpleHypergrid(name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) num_guided_samples = 1000 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() suggested_params_dict = suggested_params.to_dict() target_value = quadratic(**suggested_params_dict) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params_dict.items() }) target_values_df = pd.DataFrame({'y': [target_value]}) bayesian_optimizer.register(input_values_df, target_values_df) if i > 20 and i % 20 == 0: self.logger.info( f"[{i}/{num_guided_samples}] Optimum: {bayesian_optimizer.optimum()}" ) self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}")
def test_basic_functionality_on_2d_objective_space(self): """Basic sanity check. Mainly used to help us develop the API. """ # Let's just create a bunch of random points, build a pareto frontier # and verify that the invariants hold. # parameter_space = SimpleHypergrid( name='params', dimensions=[ ContinuousDimension(name='x1', min=0, max=10) ] ) objective_space = SimpleHypergrid( name='objectives', dimensions=[ ContinuousDimension(name='y1', min=0, max=10), ContinuousDimension(name='y2', min=0, max=10) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ] ) num_rows = 100000 random_objectives_df = objective_space.random_dataframe(num_rows) # They don't match but they don't need to for this test. # random_params_df = parameter_space.random_dataframe(num_rows) pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=random_objectives_df, parameters_df=random_params_df ) pareto_df = pareto_frontier.pareto_df non_pareto_index = random_objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (random_objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def test_repeated_values(self): """Validates that the algorithm does its job in the presence of repeated values. :return: """ optimization_problem = OptimizationProblem( parameter_space=None, objective_space=SimpleHypergrid( name="objectives", dimensions=[ ContinuousDimension(name='y1', min=0, max=5), ContinuousDimension(name='y2', min=0, max=5) ] ), objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ] ) expected_pareto_df = pd.DataFrame( [ [1, 2], [1, 2], [2, 1], [0.5, 2], [1, 1], [2, 0.5] ], columns=['y1', 'y2'] ) dominated_df = pd.DataFrame( [ [0.5, 0.5], [0.5, 1], [0.5, 1.5], [1, 0.5], [1.5, 0.5] ], columns=['y1', 'y2'] ) all_objectives_df = pd.concat([dominated_df, expected_pareto_df]) pareto_frontier = ParetoFrontier( optimization_problem, objectives_df=all_objectives_df, parameters_df=pd.DataFrame(index=all_objectives_df.index) ) computed_pareto_df = pareto_frontier.pareto_df assert computed_pareto_df.sort_values(by=['y1','y2']).equals(expected_pareto_df.sort_values(by=['y1', 'y2']))
class SimpleBayesianOptimizerConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid(name="SimpleBayesianOptimizerConfig", dimensions=[ CategoricalDimension( name='utility_function', values=['ucb', 'ei', 'poi']), ContinuousDimension(name='kappa', min=-5, max=5), ContinuousDimension(name='xi', min=-5, max=5) ]) _DEFAULT = Point(utility_function='ucb', kappa=3, xi=1) @classmethod def contains(cls, config): if not isinstance(config, cls): return False return Point(utility_function=config.utility_function, kappa=config.kappa, xi=config.xi) in cls.CONFIG_SPACE @classmethod def create_from_config_point(cls, config_point): assert config_point in cls.CONFIG_SPACE return cls(utility_function=config_point.utility_function, kappa=config_point.kappa, xi=config_point.xi) def __init__(self, utility_function=None, kappa=None, xi=None): if utility_function is None: utility_function = self._DEFAULT.utility_function if kappa is None: kappa = self._DEFAULT.kappa if xi is None: xi = self._DEFAULT.xi self.utility_function = utility_function self.kappa = kappa self.xi = xi def to_dict(self): return { 'utility_function': self.utility_function, 'kappa': self.kappa, 'xi': self.xi }
def test_optimization_problem_none_context(self): parameter_space = SimpleHypergrid( name="test", dimensions=[ ContinuousDimension(name="x", min=0, max=1), OrdinalDimension(name="y", ordered_values=[1, 2, 3, 5, 10]), CategoricalDimension(name="y2", values=[True, False]) ]) objective_space = SimpleHypergrid(name="z", dimensions=[ ContinuousDimension( name="z\n special", min=-50, max=-49), ContinuousDimension(name="z1", min=-1, max=1) ]) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name="z\n special", minimize=True), Objective(name="z1", minimize=False) ]) encoded_problem = OptimizerServiceEncoder.encode_optimization_problem( optimization_problem) decoded_problem = OptimizerServiceDecoder.decode_optimization_problem( encoded_problem) print(f"Context space is: {decoded_problem.context_space}") assert decoded_problem.context_space is None # Ensure that the parameter space is still valid # Parameter Space for _ in range(1000): assert decoded_problem.parameter_space.random() in parameter_space assert parameter_space.random() in decoded_problem.parameter_space # Output Space for _ in range(1000): assert decoded_problem.objective_space.random() in objective_space assert objective_space.random() in decoded_problem.objective_space # Feature Space for _ in range(1000): assert decoded_problem.feature_space.random( ) in optimization_problem.feature_space assert optimization_problem.feature_space.random( ) in decoded_problem.feature_space
class Flower(ObjectiveFunctionBase): """ Flower function exposing the ObjectiveFunctionBase interface. """ _domain = SimpleHypergrid(name="flower", dimensions=[ ContinuousDimension(name='x1', min=-100, max=100), ContinuousDimension(name='x2', min=-100, max=100) ]) _range = SimpleHypergrid(name='range', dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) def __init__(self, objective_function_config: Point = None): assert objective_function_config is None, "This function takes no configuration." ObjectiveFunctionBase.__init__(self, objective_function_config) @property def parameter_space(self) -> Hypergrid: return self._domain @property def output_space(self) -> Hypergrid: return self._range def evaluate_dataframe(self, dataframe: pd.DataFrame): a = 1 b = 2 c = 4 x = dataframe.to_numpy() sum_of_squares = np.sum(x**2, axis=1) x_norm = np.sqrt(sum_of_squares) values = a * x_norm + b * np.sin(c * np.arctan2(x[:, 0], x[:, 1])) return pd.DataFrame({'y': values}) def get_context(self) -> Point: """ Returns a context value for this objective function. If the context changes on every invokation, this should return the latest one. :return: """ return Point()
def test_construct_feature_dataframe_no_context(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) n_samples = 100 parameter_df = optimization_problem.parameter_space.random_dataframe( n_samples) feature_df = optimization_problem.construct_feature_dataframe( parameters_df=parameter_df) assert feature_df.shape == ( n_samples, len(optimization_problem.parameter_space.dimension_names) + 1) expected_columns = sorted([ f"three_level_quadratic_config.{n}" for n in optimization_problem.parameter_space.dimension_names ]) assert ( feature_df.columns[:-1].sort_values() == expected_columns).all() assert feature_df.columns[-1] == "contains_context" assert not feature_df.contains_context.any()
class ExperimentDesignerConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name='experiment_designer_config', dimensions=[ CategoricalDimension( 'utility_function_implementation', values=[ConfidenceBoundUtilityFunction.__name__]), CategoricalDimension('numeric_optimizer_implementation', values=[RandomSearchOptimizer.__name__]), ContinuousDimension('fraction_random_suggestions', min=0, max=1) ]).join(subgrid=ConfidenceBoundUtilityFunctionConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( 'utility_function_implementation', values=[ConfidenceBoundUtilityFunction.__name__])).join( subgrid=RandomSearchOptimizerConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( 'numeric_optimizer_implementation', values=[RandomSearchOptimizer.__name__])) _DEFAULT = Point( utility_function_implementation=ConfidenceBoundUtilityFunction. __name__, numeric_optimizer_implementation=RandomSearchOptimizer.__name__, confidence_bound_utility_function_config= ConfidenceBoundUtilityFunctionConfig.DEFAULT, random_search_optimizer_config=RandomSearchOptimizerConfig.DEFAULT, fraction_random_suggestions=0.5)
def test_hierarchical_quadratic_cold_start(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 2 for restart_num in range(num_restarts): optimizer_config = bayesian_optimizer_config_store.default optimizer_config.min_samples_required_for_guided_design_of_experiments = 20 optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best" optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.min_samples_to_fit = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2 local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) for bayesian_optimizer in [local_optimizer, remote_optimizer]: num_guided_samples = 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) print( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = y.to_dataframe() bayesian_optimizer.register( feature_values_pandas_frame=input_values_df, target_values_pandas_frame=target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION) print( f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(optimizer=bayesian_optimizer)
def __init__(self, objective_function_config: Point): assert objective_function_config.polynomial_objective_config in PolynomialObjective.CONFIG_SPACE ObjectiveFunctionBase.__init__(self, objective_function_config) # Let's start building the parameter space for it. # self._parameter_space = SimpleHypergrid( name="domain", dimensions=[ CategoricalDimension(name="polynomial_id", values=[id for id in range(self.objective_function_config.num_nested_polynomials)]) ] ) polynomial_objective_config = self.objective_function_config.polynomial_objective_config self._polynomial_objective_config = polynomial_objective_config self._polynomials = [] # Let's create the required number of polynomials. # for i in range(self.objective_function_config.num_nested_polynomials): polynomial_objective_config.seed += i + 1 # Change the seed so that it's still effective but also reproducible. polynomial = PolynomialObjectiveWrapper(polynomial_objective_config, domain_name=f"domain_{i}") self._polynomials.append(polynomial) self._parameter_space.join( subgrid=polynomial.parameter_space, on_external_dimension=CategoricalDimension(name="polynomial_id", values=[i]) ) self._output_space = SimpleHypergrid( name='output_space', dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] )
class ConfidenceBoundUtilityFunctionConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name="confidence_bound_utility_function_config", dimensions=[ CategoricalDimension(name="utility_function_name", values=[ "lower_confidence_bound_on_improvement", "upper_confidence_bound_on_improvement" ]), ContinuousDimension(name="alpha", min=0.01, max=0.5) ]) _DEFAULT = Point( utility_function_name="upper_confidence_bound_on_improvement", alpha=0.01) @classmethod def create_from_config_point(cls, config_point): config_key_value_pairs = { param_name: value for param_name, value in config_point } return cls(**config_key_value_pairs) def __init__(self, utility_function_name=_DEFAULT.utility_function_name, alpha=_DEFAULT.alpha): self.utility_function_name = utility_function_name self.alpha = alpha
def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.DEBUG # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_grpc_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_grpc_channel, logger=self.logger) self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context.communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, bayesian_optimizer_grpc_channel=self.optimizer_service_grpc_channel ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons() # TODO: having both globals and global_values is a problem # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) self.mlos_agent.set_configuration( component_type=SmartCacheWorkloadGenerator, new_config_values=Point( workload_type='cyclical_key_from_range', cyclical_key_from_range_config=Point( min=0, range_width=2048 ) ) ) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator(logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.hit_rate_monitor = HitRateMonitor() self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[self.working_set_size_estimator, self.hit_rate_monitor] ) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ContinuousDimension(name="hit_rate", min=0, max=1)]), objectives=[Objective(name="hit_rate", minimize=False)] )
def decode_continuous_dimension( serialized: OptimizerService_pb2.ContinuousDimension ) -> ContinuousDimension: assert isinstance(serialized, OptimizerService_pb2.ContinuousDimension) return ContinuousDimension(name=serialized.Name, min=serialized.Min, max=serialized.Max, include_min=serialized.IncludeMin, include_max=serialized.IncludeMax)
def test_construct_feature_dataframe_context(self): def f(parameters, context): return pd.DataFrame({ 'function_value': -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2) }) input_space = SimpleHypergrid( name="my_input_name", dimensions=[ContinuousDimension(name="x", min=0, max=1)]) output_space = SimpleHypergrid(name="objective", dimensions=[ ContinuousDimension( name="function_value", min=-10, max=10) ]) context_space = SimpleHypergrid( name="my_context_name", dimensions=[ContinuousDimension(name="y", min=-1, max=1)]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, # we want to minimize the function objectives=[Objective(name="function_value", minimize=True)], context_space=context_space) n_samples = 100 parameter_df = input_space.random_dataframe(n_samples) context_df = context_space.random_dataframe(n_samples) with pytest.raises(ValueError, match="Context required"): optimization_problem.construct_feature_dataframe( parameters_df=parameter_df) feature_df = optimization_problem.construct_feature_dataframe( parameters_df=parameter_df, context_df=context_df) assert isinstance(feature_df, pd.DataFrame) assert feature_df.shape == (n_samples, 3) assert (feature_df.columns == [ 'my_input_name.x', 'contains_context', 'my_context_name.y' ]).all() assert feature_df.contains_context.all()
class HomogeneousRandomForestRegressionModelConfig(RegressionModelConfig): CONFIG_SPACE = SimpleHypergrid( name="homogeneous_random_forest_regression_model_config", dimensions=[ DiscreteDimension(name="n_estimators", min=1, max=100), ContinuousDimension(name="features_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True), ContinuousDimension(name="samples_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True), CategoricalDimension(name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__]), ] ).join( subgrid=DecisionTreeRegressionModelConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension(name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__]) ) _DEFAULT = Point( n_estimators=5, features_fraction_per_estimator=1, samples_fraction_per_estimator=0.7, regressor_implementation=DecisionTreeRegressionModel.__name__, decision_tree_regression_model_config=DecisionTreeRegressionModelConfig.DEFAULT ) def __init__( self, n_estimators=_DEFAULT.n_estimators, features_fraction_per_estimator=_DEFAULT.features_fraction_per_estimator, samples_fraction_per_estimator=_DEFAULT.samples_fraction_per_estimator, regressor_implementation=_DEFAULT.regressor_implementation, decision_tree_regression_model_config: Point()=_DEFAULT.decision_tree_regression_model_config ): self.n_estimators = n_estimators self.features_fraction_per_estimator = features_fraction_per_estimator self.samples_fraction_per_estimator = samples_fraction_per_estimator self.regressor_implementation = regressor_implementation assert regressor_implementation == DecisionTreeRegressionModel.__name__ self.decision_tree_regression_model_config = DecisionTreeRegressionModelConfig.create_from_config_point(decision_tree_regression_model_config) @classmethod def contains(cls, config): # pylint: disable=unused-argument return True # TODO: see if you can remove this class entirely.
def _build_simple_hypergrid_target(self) -> None: self._target = SimpleHypergrid(name=self._adaptee.name, dimensions=None, random_state=self._adaptee.random_state) # Add non-transformed adaptee dimensions to the target for adaptee_dimension in self._adaptee.dimensions: if adaptee_dimension.name not in self._adaptee_dimension_names_to_transform: self._target.add_dimension(adaptee_dimension.copy()) if not self._adaptee_contains_dimensions_to_transform: return # add new dimensions to be created by sklearn PolynomialFeatures # construct target dim names using adaptee dim names and polynomial feature powers matrix # This logic is worked out explicitly here so we have control over the derived dimension names. # Currently, the code only substitutes adaptee feature names into the default feature_names produced by # sklearn's PolynomialFeatures .get_feature_names() method. poly_feature_dim_names = self._get_polynomial_feature_names() for i, poly_feature_name in enumerate(poly_feature_dim_names): ith_terms_powers = self._polynomial_features_powers[i] if not self._polynomial_features_kwargs[ 'include_bias'] and ith_terms_powers.sum() == 0: # the constant term is skipped continue else: # replace adaptee dim names for poly feature name {x0_, x1_, ...} representatives target_dim_name = poly_feature_name for j, adaptee_dim_name in enumerate( self._adaptee_dimension_names_to_transform): adaptee_dim_power = ith_terms_powers[j] if adaptee_dim_power == 0: continue if adaptee_dim_power == 1: poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}' adaptee_dim_replacement_name = adaptee_dim_name else: # power > 1 cases poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}^{adaptee_dim_power}' adaptee_dim_replacement_name = f'{adaptee_dim_name}^{adaptee_dim_power}' target_dim_name = target_dim_name.replace( poly_feature_adaptee_dim_name_standin, adaptee_dim_replacement_name) # add target dimension # min and max are placed at -Inf and +Inf since .random() on the target hypergrid is generated on the original # hypergrid and passed through the adapters. self._target.add_dimension( ContinuousDimension(name=target_dim_name, min=-math.inf, max=math.inf)) self._target_polynomial_feature_map[target_dim_name] = i
def test_continuous_dimension(self, include_min, include_max): continuous_dimension = ContinuousDimension(name='continuous', min=0, max=10, include_min=include_min, include_max=include_max) serialized = OptimizerServiceEncoder.encode_continuous_dimension( continuous_dimension) deserialized_continuous_dimension = OptimizerServiceDecoder.decode_continuous_dimension( serialized) assert isinstance(serialized, OptimizerService_pb2.ContinuousDimension) assert deserialized_continuous_dimension == continuous_dimension
def setUp(self): mlos_globals.init_mlos_global_context() mlos_globals.mlos_global_context.start_clock() self.logger = create_logger('TestSmartCacheWithRemoteOptimizer') self.logger.level = logging.INFO self.mlos_agent = MlosAgent( logger=self.logger, communication_channel=mlos_globals.mlos_global_context. communication_channel, shared_config=mlos_globals.mlos_global_context.shared_config, ) self.mlos_agent_thread = Thread(target=self.mlos_agent.run) self.mlos_agent_thread.start() global_values.declare_singletons( ) # TODO: having both globals and global_values is a problem self.workload_duration_s = 5 # Let's add the allowed component types self.mlos_agent.add_allowed_component_type(SmartCache) self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator) # Let's create the workload self.smart_cache_workload = SmartCacheWorkloadGenerator( logger=self.logger) self.optimizer = None self.working_set_size_estimator = WorkingSetSizeEstimator() self.cache_config_timer = Timer( timeout_ms=200, observer_callback=self._set_new_cache_configuration) self.smart_cache_experiment = MlosExperiment( smart_component_types=[SmartCache], telemetry_aggregators=[ self.cache_config_timer, self.working_set_size_estimator ]) self.optimization_problem = OptimizationProblem( parameter_space=SmartCache.parameter_search_space, objective_space=SimpleHypergrid(name="objectives", dimensions=[ ContinuousDimension( name="miss_rate", min=0, max=1) ]), context_space=None, # TODO: add working set size estimate objectives=[Objective(name="miss_rate", minimize=True)])
def __init__(self, objective_function_config: Point): assert objective_function_config in PolynomialObjective.CONFIG_SPACE ObjectiveFunctionBase.__init__(self, objective_function_config) self._polynomial_objective_config = objective_function_config self._polynomial_function = PolynomialObjective( seed=objective_function_config.seed, input_domain_dimension=objective_function_config. input_domain_dimension, max_degree=objective_function_config.max_degree, include_mixed_coefficients=objective_function_config. include_mixed_coefficients, percent_coefficients_zeroed=objective_function_config. percent_coefficients_zeroed, coefficient_domain_min=objective_function_config. coefficient_domain_min, coefficient_domain_width=objective_function_config. coefficient_domain_width, include_noise=objective_function_config.include_noise, noise_coefficient_of_variation=objective_function_config. noise_coefficient_of_variation, ) self._parameter_space = SimpleHypergrid( name="domain", dimensions=[ ContinuousDimension( name=f"x_{i}", min=objective_function_config.coefficient_domain_min, max=objective_function_config.coefficient_domain_min + objective_function_config.coefficient_domain_width) for i in range(objective_function_config.input_domain_dimension) ]) self._output_space = SimpleHypergrid(name='output_space', dimensions=[ ContinuousDimension( name='y', min=-math.inf, max=math.inf) ])
def test_composite_dimension(self): original_A = ContinuousDimension(name='x', min=0, max=1) original_B = ContinuousDimension(name='x', min=2, max=3) original_C = ContinuousDimension(name='x', min=2.5, max=3.5) original_D = original_A.union(original_B) - original_C original_E = original_B - original_C original_F = original_A.union(original_E) serialized_A = json.dumps(original_A, cls=HypergridJsonEncoder, indent=2) serialized_B = json.dumps(original_B, cls=HypergridJsonEncoder, indent=2) serialized_C = json.dumps(original_C, cls=HypergridJsonEncoder, indent=2) serialized_D = json.dumps(original_D, cls=HypergridJsonEncoder, indent=2) serialized_E = json.dumps(original_E, cls=HypergridJsonEncoder, indent=2) serialized_F = json.dumps(original_F, cls=HypergridJsonEncoder, indent=2) A = json.loads(serialized_A, cls=HypergridJsonDecoder) B = json.loads(serialized_B, cls=HypergridJsonDecoder) C = json.loads(serialized_C, cls=HypergridJsonDecoder) D = json.loads(serialized_D, cls=HypergridJsonDecoder) E = json.loads(serialized_E, cls=HypergridJsonDecoder) F = json.loads(serialized_F, cls=HypergridJsonDecoder) self.assertTrue(A in original_A) self.assertTrue(B in original_B) self.assertTrue(C in original_C) self.assertTrue(D in original_D) self.assertTrue(E in original_E) self.assertTrue(F in original_F) self.assertTrue(original_A in A) self.assertTrue(original_B in B) self.assertTrue(original_C in C) self.assertTrue(original_D in D) self.assertTrue(original_E in E) self.assertTrue(original_F in F) self.assertTrue(0.5 in D) self.assertTrue(1.5 not in D) self.assertTrue(2.5 not in D) self.assertTrue(3.4 not in D) self.assertTrue(35 not in D) self.assertTrue(2 in E) self.assertTrue(2.5 not in E) self.assertTrue(0 in F and 1 in F and 1.5 not in F and 2 in F and 2.5 not in F)
def setup_method(self, method): # Let's create a simple linear mapping self.gradient = 10 self.y_intercept = 10 self.input_values = np.linspace(start=0, stop=100, num=101, endpoint=True) self.output_values = self.input_values * self.gradient + self.y_intercept self.input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=100)]) self.output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension( name="y", min=-math.inf, max=math.inf) ]) self.input_pandas_dataframe = pd.DataFrame({"x": self.input_values}) self.output_pandas_dataframe = pd.DataFrame({"y": self.output_values})