def setUp(self): # Let's create a simple quadratic response function self.input_space = SimpleHypergrid(name="2d_X_search_domain", dimensions=[ ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0) ]) self.output_space = SimpleHypergrid( name="degree2_polynomial", dimensions=[ ContinuousDimension(name="degree2_polynomial_y", min=-10**15, max=10**15) ]) lasso_model_config = SklearnLassoRegressionModelConfig.DEFAULT rf_model_config = SklearnRandomForestRegressionModelConfig.DEFAULT self.model_config = \ RegressionEnhancedRandomForestRegressionModelConfig( max_basis_function_degree=2, min_abs_root_model_coef=0.02, boosting_root_model_name=SklearnLassoRegressionModelConfig.__name__, boosting_root_model_config=lasso_model_config, random_forest_model_config=rf_model_config, perform_initial_root_model_hyper_parameter_search=True, perform_initial_random_forest_hyper_parameter_search=True)
def _build_simple_hypergrid_target(self) -> None: """ Builds a SimpleHypergrid target for a SimpleHypergrid adaptee. :return: """ self._target = SimpleHypergrid(name=self._adaptee.name, dimensions=None, random_state=self._adaptee.random_state) # Now we iterate over all dimensions and when necessary map the CategoricalDimensions to DiscreteDimensions # for adaptee_dimension in self._adaptee.dimensions: if isinstance(adaptee_dimension, DiscreteDimension): target_dimension = ContinuousDimension( name=adaptee_dimension.name, min=0, max=1, include_max=False) else: target_dimension = ContinuousDimension( name=adaptee_dimension.name, min=0, max=1, include_min=adaptee_dimension.include_min, include_max=adaptee_dimension.include_max) self._target.add_dimension(target_dimension) self._adaptee_to_target_dimension_mappings[ adaptee_dimension.name] = target_dimension self._target_to_adaptee_dimension_mappings[ target_dimension.name] = adaptee_dimension
def setup_method(self, method): self.model_config = lasso_cross_validated_config_store.default self.max_basis_function_degree = 2 self.test_case_globals = { '2d_X_deg2_poly_input_space': SimpleHypergrid(name="2d_X_deg2_poly_search_domain", dimensions=[ ContinuousDimension(name="1", min=0.0, max=5.0), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), ContinuousDimension(name="x1**2", min=0.0, max=25.0), ContinuousDimension(name="x1*x2", min=0.0, max=25.0), ContinuousDimension(name="x2**2", min=0.0, max=25.0) ]), 'categorical_deg2_poly_input_space': SimpleHypergrid(name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="1", min=0.0, max=5.0), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), ContinuousDimension(name="x1**2", min=0.0, max=25.0), ContinuousDimension(name="x1*x2", min=0.0, max=25.0), ContinuousDimension(name="x2**2", min=0.0, max=25.0), CategoricalDimension(name='i0', values=['-5', '5']) ]), 'degree2_output_space': SimpleHypergrid(name="degree2_polynomial", dimensions=[ ContinuousDimension( name="degree2_polynomial_y", min=-10**15, max=10**15) ]) }
def test_optimum_before_register_error(self): input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name='x', min=-10, max=10)]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=bayesian_optimizer_config_store.default) with pytest.raises(ValueError): bayesian_optimizer.optimum() bayesian_optimizer.register( parameter_values_pandas_frame=pd.DataFrame({'x': [0.0]}), target_values_pandas_frame=pd.DataFrame({'y': [1.0]})) bayesian_optimizer.optimum()
def setUpClass(cls) -> None: global_values.declare_singletons() cls.slope = 10 cls.y_intercept = 10 cls.input_values = np.linspace(start=0, stop=100, num=1000, endpoint=True) cls.output_values = cls.input_values * cls.slope + cls.y_intercept cls.input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=100)] ) cls.output_space = SimpleHypergrid( name="output", dimensions=[ContinuousDimension(name="y", min=-math.inf, max=math.inf)] ) cls.input_pandas_dataframe = pd.DataFrame({"x": cls.input_values}) cls.output_pandas_dataframe = pd.DataFrame({"y": cls.output_values}) cls.model_config = HomogeneousRandomForestRegressionModelConfig() cls.model = HomogeneousRandomForestRegressionModel( model_config=cls.model_config, input_space=cls.input_space, output_space=cls.output_space ) cls.model.fit(cls.input_pandas_dataframe, cls.output_pandas_dataframe, iteration_number=len(cls.input_pandas_dataframe.index)) cls.sample_inputs = {'x': np.linspace(start=-10, stop=110, num=13, endpoint=True)} cls.sample_inputs_pandas_dataframe = pd.DataFrame(cls.sample_inputs) cls.sample_predictions = cls.model.predict(cls.sample_inputs_pandas_dataframe)
def setUp(self): # Let's create a simple linear mapping self.slope = 10 self.y_intercept = 10 self.input_values = np.linspace(start=0, stop=100, num=1001, endpoint=True) self.input_output_mapping = lambda input: input * self.slope + self.y_intercept self.output_values = self.input_output_mapping(self.input_values) self.input_space = SimpleHypergrid( name="input", dimensions=[ContinuousDimension(name="x", min=0, max=100)]) self.output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension( name="y", min=-math.inf, max=math.inf) ]) self.input_pandas_dataframe = pd.DataFrame({"x": self.input_values}) self.output_pandas_dataframe = pd.DataFrame({"y": self.output_values})
def setUp(self): self.emergency_buffer_settings = SimpleHypergrid( name='emergency_buffer_config', dimensions=[ DiscreteDimension(name='log2_emergency_buffer_size', min=0, max=16), CategoricalDimension(name='use_colors', values=[True, False]) ]) self.emergency_buffer_color = SimpleHypergrid( name='emergency_buffer_color', dimensions=[ CategoricalDimension(name='color', values=['Maroon', 'Crimson', 'Tanager']) ]) self.emergency_buffer_settings_with_color = self.emergency_buffer_settings.join( subgrid=self.emergency_buffer_color, on_external_dimension=CategoricalDimension(name='use_colors', values=[True])) self.hierarchical_settings = SimpleHypergrid( name='communication_channel_config', dimensions=[ DiscreteDimension(name='num_readers', min=1, max=64), DiscreteDimension(name='log2_buffer_size', min=10, max=24), CategoricalDimension(name='use_emergency_buffer', values=[True, False]) ]).join(subgrid=self.emergency_buffer_settings_with_color, on_external_dimension=CategoricalDimension( name='use_emergency_buffer', values=[True]))
def __init__(self, objective_function_config: Point): assert objective_function_config.polynomial_objective_config in PolynomialObjective.CONFIG_SPACE ObjectiveFunctionBase.__init__(self, objective_function_config) # Let's start building the parameter space for it. # self._parameter_space = SimpleHypergrid( name="domain", dimensions=[ CategoricalDimension(name="polynomial_id", values=[id for id in range(self.objective_function_config.num_nested_polynomials)]) ] ) polynomial_objective_config = self.objective_function_config.polynomial_objective_config self._polynomial_objective_config = polynomial_objective_config self._polynomials = [] # Let's create the required number of polynomials. # for i in range(self.objective_function_config.num_nested_polynomials): polynomial_objective_config.seed += i + 1 # Change the seed so that it's still effective but also reproducible. polynomial = PolynomialObjectiveWrapper(polynomial_objective_config, domain_name=f"domain_{i}") self._polynomials.append(polynomial) self._parameter_space.join( subgrid=polynomial.parameter_space, on_external_dimension=CategoricalDimension(name="polynomial_id", values=[i]) ) self._output_space = SimpleHypergrid( name='output_space', dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] )
def setUp(self): self.logger = create_logger(self.__class__.__name__) # Start up the gRPC service. # self.server = OptimizerMicroserviceServer(port=50051, num_threads=10) self.server.start() self.optimizer_service_channel = grpc.insecure_channel('localhost:50051') self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger) self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger) # Define the optimization problem. # input_space = SimpleHypergrid( name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ] ) output_space = SimpleHypergrid( name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] ) self.optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)] )
def __init__(self, objective_function_config: Point = None): assert objective_function_config in enveloped_waves_config_space, f"{objective_function_config} not in {enveloped_waves_config_space}" ObjectiveFunctionBase.__init__(self, objective_function_config) self._parameter_space = SimpleHypergrid( name="domain", dimensions=[ ContinuousDimension(name=f"x_{i}", min=0, max=objective_function_config.num_periods * objective_function_config.period) for i in range(self.objective_function_config.num_params) ] ) self._output_space = SimpleHypergrid( name="range", dimensions=[ ContinuousDimension(name="y", min=-math.inf, max=math.inf) ] ) if self.objective_function_config.envelope_type == "linear": self._envelope = self._linear_envelope elif self.objective_function_config.envelope_type == "quadratic": self._envelope = self._quadratic_envelope elif self.objective_function_config.envelope_type == "sine": self._envelope = self._sine_envelope else: self._envelope = lambda x: x * 0 + 1
def setUpClass(cls) -> None: cls.simple_hypergrid = SimpleHypergrid( name='simple_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 1]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[1, False, 'two']) ] ) cls.hierarchical_hypergrid = SimpleHypergrid( name='hierarchical_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, False, 'two']) ] ).join( subgrid=SimpleHypergrid( name="nested_grid", dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', False, True, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, 'two', False]) ] ), on_external_dimension=CategoricalDimension("categorical_mixed_types", values=[True]) )
def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start( self): """ Tests the bayesian optimizer on a simple quadratic function with no prior data. :return: """ input_space = SimpleHypergrid(name="input", dimensions=[ ContinuousDimension(name='x_1', min=-100, max=100), ContinuousDimension(name='x_2', min=-100, max=100) ]) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=input_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) bayesian_optimizer = BayesianOptimizer( optimization_problem=optimization_problem, optimizer_config=BayesianOptimizerConfig.DEFAULT, logger=self.logger) num_guided_samples = 1000 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() suggested_params_dict = suggested_params.to_dict() target_value = quadratic(**suggested_params_dict) self.logger.info( f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params_dict.items() }) target_values_df = pd.DataFrame({'y': [target_value]}) bayesian_optimizer.register(input_values_df, target_values_df) if i > 20 and i % 20 == 0: self.logger.info( f"[{i}/{num_guided_samples}] Optimum: {bayesian_optimizer.optimum()}" ) self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}")
def _build_simple_hypergrid_target(self) -> None: self._target = SimpleHypergrid(name=self._adaptee.name, dimensions=None, random_state=self._adaptee.random_state) # Add non-transformed adaptee dimensions to the target for adaptee_dimension in self._adaptee.dimensions: if adaptee_dimension.name not in self._adaptee_dimension_names_to_transform: self._target.add_dimension(adaptee_dimension.copy()) if not self._adaptee_contains_dimensions_to_transform: return # add new dimensions to be created by sklearn PolynomialFeatures # construct target dim names using adaptee dim names and polynomial feature powers matrix # This logic is worked out explicitly here so we have control over the derived dimension names. # Currently, the code only substitutes adaptee feature names into the default feature_names produced by # sklearn's PolynomialFeatures .get_feature_names() method. poly_feature_dim_names = self._get_polynomial_feature_names() for i, poly_feature_name in enumerate(poly_feature_dim_names): ith_terms_powers = self._polynomial_features_powers[i] if not self._polynomial_features_kwargs[ 'include_bias'] and ith_terms_powers.sum() == 0: # the constant term is skipped continue else: # replace adaptee dim names for poly feature name {x0_, x1_, ...} representatives target_dim_name = poly_feature_name for j, adaptee_dim_name in enumerate( self._adaptee_dimension_names_to_transform): adaptee_dim_power = ith_terms_powers[j] if adaptee_dim_power == 0: continue if adaptee_dim_power == 1: poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}' adaptee_dim_replacement_name = adaptee_dim_name else: # power > 1 cases poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}^{adaptee_dim_power}' adaptee_dim_replacement_name = f'{adaptee_dim_name}^{adaptee_dim_power}' target_dim_name = target_dim_name.replace( poly_feature_adaptee_dim_name_standin, adaptee_dim_replacement_name) # add target dimension # min and max are placed at -Inf and +Inf since .random() on the target hypergrid is generated on the original # hypergrid and passed through the adapters. self._target.add_dimension( ContinuousDimension(name=target_dim_name, min=-math.inf, max=math.inf)) self._target_polynomial_feature_map[target_dim_name] = i
def test_randomly_generating_team_member(self): self.logger.info("Starting first check in test.") mlos_team = SimpleHypergrid( name="mlos_team", dimensions=[ CategoricalDimension(name="member", values=["Ed", "Greg", "Sergiy", "Yaser", "Adam", "Zack"]) ] ) random_member = mlos_team.random() assert random_member in mlos_team
def setUp(self): self.model_config = RegressionEnhancedRandomForestRegressionModelConfig.DEFAULT self.test_case_globals = { '2d_X_input_space': SimpleHypergrid(name="2d_X_search_domain", dimensions=[ ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0) ]), 'categorical_input_space': SimpleHypergrid(name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), CategoricalDimension(name='i0', values=['-5', '5']) ]), 'categorical_hierarchical_input_space': SimpleHypergrid(name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), CategoricalDimension(name='i0', values=['-5', '5']) ]), 'output_space': SimpleHypergrid(name="degree2_polynomial", dimensions=[ ContinuousDimension( name="degree2_polynomial_y", min=-10**15, max=10**15) ]) }
class Flower(ObjectiveFunctionBase): """ Flower function exposing the ObjectiveFunctionBase interface. """ _domain = SimpleHypergrid(name="flower", dimensions=[ ContinuousDimension(name='x1', min=-100, max=100), ContinuousDimension(name='x2', min=-100, max=100) ]) _range = SimpleHypergrid(name='range', dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) def __init__(self, objective_function_config: Point = None): assert objective_function_config is None, "This function takes no configuration." ObjectiveFunctionBase.__init__(self, objective_function_config) @property def parameter_space(self) -> Hypergrid: return self._domain @property def output_space(self) -> Hypergrid: return self._range def evaluate_dataframe(self, dataframe: pd.DataFrame): a = 1 b = 2 c = 4 x = dataframe.to_numpy() sum_of_squares = np.sum(x**2, axis=1) x_norm = np.sqrt(sum_of_squares) values = a * x_norm + b * np.sin(c * np.arctan2(x[:, 0], x[:, 1])) return pd.DataFrame({'y': values}) def get_context(self) -> Point: """ Returns a context value for this objective function. If the context changes on every invokation, this should return the latest one. :return: """ return Point()
def test_basic_functionality_on_2d_objective_space(self): """Basic sanity check. Mainly used to help us develop the API. """ # Let's just create a bunch of random points, build a pareto frontier # and verify that the invariants hold. # parameter_space = SimpleHypergrid( name='params', dimensions=[ContinuousDimension(name='x1', min=0, max=10)]) objective_space = SimpleHypergrid(name='objectives', dimensions=[ ContinuousDimension(name='y1', min=0, max=10), ContinuousDimension(name='y2', min=0, max=10) ]) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ]) num_rows = 100000 random_objectives_df = objective_space.random_dataframe(num_rows) pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=random_objectives_df) pareto_df = pareto_frontier.pareto_df non_pareto_index = random_objectives_df.index.difference( pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (random_objectives_df.loc[non_pareto_index] < row).any( axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def decode_simple_hypergrid(hypergrid: OptimizerService_pb2.SimpleHypergrid) -> SimpleHypergrid: assert isinstance(hypergrid, OptimizerService_pb2.SimpleHypergrid) decoded_hypergrid = SimpleHypergrid( name=hypergrid.Name, dimensions=[OptimizerServiceDecoder.decode_dimension(dimension) for dimension in hypergrid.Dimensions] ) for subgrid in hypergrid.GuestSubgrids: decoded_subgrid = OptimizerServiceDecoder.decode_subgrid(subgrid) decoded_hypergrid.join( subgrid=decoded_subgrid.subgrid, on_external_dimension=decoded_subgrid.join_dimension ) return decoded_hypergrid
def test_name_flattening(self): num_tests = 1000 for i in range(num_tests): random_config = self.cache_param_space.random() flat_dimensions = [] for dimension_name, value in random_config: original_dimension = self.cache_param_space[dimension_name] flat_dimension = original_dimension.copy() flat_dimension.name = Dimension.flatten_dimension_name( dimension_name) flat_dimensions.append(flat_dimension) # Let's create a flat hypergrid that contains that random_config flat_cache_param_space = SimpleHypergrid( name=f"Flat{self.cache_param_space.name}", dimensions=flat_dimensions) flat_random_config = random_config.flat_copy() self.assertTrue(flat_random_config in flat_cache_param_space) # let's try another random config another_random_config = self.cache_param_space.random() flattened_config = another_random_config.flat_copy() try: if flattened_config in flat_cache_param_space: ... self.assertTrue(True) except: self.assertTrue(False)
def test_construct_feature_dataframe_no_context(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name='y', minimize=True)]) n_samples = 100 parameter_df = optimization_problem.parameter_space.random_dataframe( n_samples) feature_df = optimization_problem.construct_feature_dataframe( parameters_df=parameter_df) assert feature_df.shape == ( n_samples, len(optimization_problem.parameter_space.dimension_names) + 1) expected_columns = sorted([ f"three_level_quadratic_config.{n}" for n in optimization_problem.parameter_space.dimension_names ]) assert ( feature_df.columns[:-1].sort_values() == expected_columns).all() assert feature_df.columns[-1] == "contains_context" assert not feature_df.contains_context.any()
class ExperimentDesignerConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name='experiment_designer_config', dimensions=[ CategoricalDimension( 'utility_function_implementation', values=[ConfidenceBoundUtilityFunction.__name__]), CategoricalDimension('numeric_optimizer_implementation', values=[RandomSearchOptimizer.__name__]), ContinuousDimension('fraction_random_suggestions', min=0, max=1) ]).join(subgrid=ConfidenceBoundUtilityFunctionConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( 'utility_function_implementation', values=[ConfidenceBoundUtilityFunction.__name__])).join( subgrid=RandomSearchOptimizerConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( 'numeric_optimizer_implementation', values=[RandomSearchOptimizer.__name__])) _DEFAULT = Point( utility_function_implementation=ConfidenceBoundUtilityFunction. __name__, numeric_optimizer_implementation=RandomSearchOptimizer.__name__, confidence_bound_utility_function_config= ConfidenceBoundUtilityFunctionConfig.DEFAULT, random_search_optimizer_config=RandomSearchOptimizerConfig.DEFAULT, fraction_random_suggestions=0.5)
def __init__(self, adaptee: Hypergrid): HypergridAdapter.__init__(self, name=adaptee.name, random_state=adaptee.random_state) self._adaptee: Hypergrid = adaptee self._target: SimpleHypergrid = None self._forward_name_mapping = dict() self._backward_name_mapping = dict() if HypergridAdapter.is_like_simple_hypergrid(self._adaptee): # Need to flatten all the names target_dimensions = [] for adaptee_dimension in self._adaptee.dimensions: target_dimension_name = Dimension.flatten_dimension_name( adaptee_dimension.name) self._forward_name_mapping[ adaptee_dimension.name] = target_dimension_name self._backward_name_mapping[ target_dimension_name] = adaptee_dimension.name target_dimension = adaptee_dimension.copy() target_dimension.name = target_dimension_name target_dimensions.append(target_dimension) self._target = SimpleHypergrid(name=self._adaptee.name, dimensions=target_dimensions) else: raise TypeError( f"Cannot build CompositeToSImpleHypergridAdapter for object of type {type(self._adaptee)}." )
def __init__(self, model_config: Point, input_space: Hypergrid, output_space: Hypergrid, logger: logging.Logger = None): NaiveMultiObjectiveRegressionModel.__init__( self, model_type=LassoCrossValidatedRegressionModel, model_config=model_config, input_space=input_space, output_space=output_space, logger=logger) # We just need to assert that the model config belongs in lasso_cross_validated_config_store.parameter_space. # A more elaborate solution might be needed down the road, but for now this simple solution should suffice. # assert model_config in lasso_cross_validated_config_store.parameter_space for output_dimension in output_space.dimensions: print(f'output_dimension.name: {output_dimension.name}') lasso_model = LassoCrossValidatedRegressionModel( model_config=model_config, input_space=input_space, output_space=SimpleHypergrid( name=f"{output_dimension.name}_objective", dimensions=[output_dimension]), logger=self.logger) self._regressors_by_objective_name[ output_dimension.name] = lasso_model
def test_hierarchical_quadratic_cold_start(self): objective_function_config = objective_function_config_store.get_config_by_name( 'three_level_quadratic') objective_function = ObjectiveFunctionFactory.create_objective_function( objective_function_config=objective_function_config) output_space = SimpleHypergrid(name="output", dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ]) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=output_space, objectives=[Objective(name='y', minimize=True)]) num_restarts = 2 for restart_num in range(num_restarts): optimizer_config = bayesian_optimizer_config_store.default optimizer_config.min_samples_required_for_guided_design_of_experiments = 20 optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best" optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.min_samples_to_fit = 10 optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2 local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer( optimization_problem=optimization_problem, optimizer_config=optimizer_config) for bayesian_optimizer in [local_optimizer, remote_optimizer]: num_guided_samples = 50 for i in range(num_guided_samples): suggested_params = bayesian_optimizer.suggest() y = objective_function.evaluate_point(suggested_params) print( f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}" ) input_values_df = pd.DataFrame({ param_name: [param_value] for param_name, param_value in suggested_params }) target_values_df = y.to_dataframe() bayesian_optimizer.register( feature_values_pandas_frame=input_values_df, target_values_pandas_frame=target_values_df) best_config_point, best_objective = bayesian_optimizer.optimum( optimum_definition=OptimumDefinition.BEST_OBSERVATION) print( f"[Restart: {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}" ) self.validate_optima(optimizer=bayesian_optimizer)
class ConfidenceBoundUtilityFunctionConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name="confidence_bound_utility_function_config", dimensions=[ CategoricalDimension(name="utility_function_name", values=[ "lower_confidence_bound_on_improvement", "upper_confidence_bound_on_improvement" ]), ContinuousDimension(name="alpha", min=0.01, max=0.5) ]) _DEFAULT = Point( utility_function_name="upper_confidence_bound_on_improvement", alpha=0.01) @classmethod def create_from_config_point(cls, config_point): config_key_value_pairs = { param_name: value for param_name, value in config_point } return cls(**config_key_value_pairs) def __init__(self, utility_function_name=_DEFAULT.utility_function_name, alpha=_DEFAULT.alpha): self.utility_function_name = utility_function_name self.alpha = alpha
def test_pareto_frontier_volume_simple(self): """A simple sanity test on the pareto frontier volume computations. """ # Let's generate a pareto frontier in 2D. ALl points lay on a line y = 1 - x x = np.linspace(start=0, stop=1, num=100) y = 1 - x pareto_df = pd.DataFrame({'x': x, 'y': y}) optimization_problem = OptimizationProblem( parameter_space=None, objective_space=SimpleHypergrid(name='objectives', dimensions=[ ContinuousDimension(name='x', min=0, max=1), ContinuousDimension(name='y', min=0, max=1) ]), objectives=[ Objective(name='x', minimize=False), Objective(name='y', minimize=False) ]) pareto_frontier = ParetoFrontier(optimization_problem, pareto_df) pareto_volume_estimator = pareto_frontier.approximate_pareto_volume( num_samples=1000000) lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume( alpha=0.05) print(lower_bound, upper_bound) assert 0.49 < lower_bound < upper_bound < 0.51
class BayesianOptimizerConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name="bayesian_optimizer_config", dimensions=[ CategoricalDimension( name="surrogate_model_implementation", values=[HomogeneousRandomForestRegressionModel.__name__]), CategoricalDimension(name="experiment_designer_implementation", values=[ExperimentDesigner.__name__]), DiscreteDimension( name="min_samples_required_for_guided_design_of_experiments", min=2, max=10000) ]).join( subgrid=HomogeneousRandomForestRegressionModelConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( name="surrogate_model_implementation", values=[HomogeneousRandomForestRegressionModel.__name__ ])).join(subgrid=ExperimentDesignerConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( name="experiment_designer_implementation", values=[ExperimentDesigner.__name__])) _DEFAULT = Point( surrogate_model_implementation=HomogeneousRandomForestRegressionModel. __name__, experiment_designer_implementation=ExperimentDesigner.__name__, min_samples_required_for_guided_design_of_experiments=10, homogeneous_random_forest_regression_model_config= HomogeneousRandomForestRegressionModelConfig.DEFAULT, experiment_designer_config=ExperimentDesignerConfig.DEFAULT)
def __init__(self, model_config: Point, input_space: Hypergrid, output_space: Hypergrid, logger: logging.Logger = None): NaiveMultiObjectiveRegressionModel.__init__( self, model_type=RegressionEnhancedRandomForestRegressionModel, model_config=model_config, input_space=input_space, output_space=output_space, logger=logger) # We just need to assert that the model config belongs in regression_enhanced_random_forest_config_store.parameter_space. # A more elaborate solution might be needed down the road, but for now this simple solution should suffice. # assert model_config in regression_enhanced_random_forest_config_store.parameter_space for output_dimension in output_space.dimensions: # We copy the model_config (rather than share across objectives below because the perform_initial_random_forest_hyper_parameter_search # is set to False after the initial fit() call so that subsequent .fit() calls don't pay the cost penalty for this embedded hyper parameter search rerf_model = RegressionEnhancedRandomForestRegressionModel( model_config=model_config.copy(), input_space=input_space, output_space=SimpleHypergrid( name=f"{output_dimension.name}_objective", dimensions=[output_dimension]), logger=self.logger) self._regressors_by_objective_name[ output_dimension.name] = rerf_model
def __init__(self, model_config: Point, input_space: Hypergrid, output_space: Hypergrid, logger=None): MultiObjectiveRegressionModel.__init__(self, model_type=type(self), model_config=model_config, input_space=input_space, output_space=output_space) if logger is None: logger = create_logger("MultiObjectiveHomogeneousRandomForest") self.logger = logger # We just need to assert that the model config belongs in homogeneous_random_forest_config_store.parameter_space. # A more elaborate solution might be needed down the road, but for now this simple solution should suffice. # assert model_config in homogeneous_random_forest_config_store.parameter_space self._regressors_by_objective_name = KeyOrderedDict( ordered_keys=self.output_dimension_names, value_type=HomogeneousRandomForestRegressionModel) for output_dimension in output_space.dimensions: random_forest = HomogeneousRandomForestRegressionModel( model_config=model_config, input_space=input_space, output_space=SimpleHypergrid( name=f"{output_dimension.name}_objective", dimensions=[output_dimension]), logger=self.logger) self._regressors_by_objective_name[ output_dimension.name] = random_forest
def __init__(self, model_config: Point, input_space: Hypergrid, output_space: Hypergrid, logger=None): NaiveMultiObjectiveRegressionModel.__init__( self, model_type=HomogeneousRandomForestRegressionModel, model_config=model_config, input_space=input_space, output_space=output_space, logger=logger) # We just need to assert that the model config belongs in homogeneous_random_forest_config_store.parameter_space. # A more elaborate solution might be needed down the road, but for now this simple solution should suffice. # assert model_config in homogeneous_random_forest_config_store.parameter_space for output_dimension in output_space.dimensions: random_forest = HomogeneousRandomForestRegressionModel( model_config=model_config, input_space=input_space, output_space=SimpleHypergrid( name=f"{output_dimension.name}_objective", dimensions=[output_dimension]), logger=self.logger) self._regressors_by_objective_name[ output_dimension.name] = random_forest