def setUp(self): self.emergency_buffer_settings = SimpleHypergrid( name='emergency_buffer_config', dimensions=[ DiscreteDimension(name='log2_emergency_buffer_size', min=0, max=16), CategoricalDimension(name='use_colors', values=[True, False]) ]) self.emergency_buffer_color = SimpleHypergrid( name='emergency_buffer_color', dimensions=[ CategoricalDimension(name='color', values=['Maroon', 'Crimson', 'Tanager']) ]) self.emergency_buffer_settings_with_color = self.emergency_buffer_settings.join( subgrid=self.emergency_buffer_color, on_external_dimension=CategoricalDimension(name='use_colors', values=[True])) self.hierarchical_settings = SimpleHypergrid( name='communication_channel_config', dimensions=[ DiscreteDimension(name='num_readers', min=1, max=64), DiscreteDimension(name='log2_buffer_size', min=10, max=24), CategoricalDimension(name='use_emergency_buffer', values=[True, False]) ]).join(subgrid=self.emergency_buffer_settings_with_color, on_external_dimension=CategoricalDimension( name='use_emergency_buffer', values=[True]))
def setup_method(self, method): self.model_config = lasso_cross_validated_config_store.default self.max_basis_function_degree = 2 self.test_case_globals = { '2d_X_deg2_poly_input_space': SimpleHypergrid(name="2d_X_deg2_poly_search_domain", dimensions=[ ContinuousDimension(name="1", min=0.0, max=5.0), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), ContinuousDimension(name="x1**2", min=0.0, max=25.0), ContinuousDimension(name="x1*x2", min=0.0, max=25.0), ContinuousDimension(name="x2**2", min=0.0, max=25.0) ]), 'categorical_deg2_poly_input_space': SimpleHypergrid(name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="1", min=0.0, max=5.0), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), ContinuousDimension(name="x1**2", min=0.0, max=25.0), ContinuousDimension(name="x1*x2", min=0.0, max=25.0), ContinuousDimension(name="x2**2", min=0.0, max=25.0), CategoricalDimension(name='i0', values=['-5', '5']) ]), 'degree2_output_space': SimpleHypergrid(name="degree2_polynomial", dimensions=[ ContinuousDimension( name="degree2_polynomial_y", min=-10**15, max=10**15) ]) }
def setUpClass(cls) -> None: cls.simple_hypergrid = SimpleHypergrid( name='simple_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 1]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[1, False, 'two']) ] ) cls.hierarchical_hypergrid = SimpleHypergrid( name='hierarchical_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, False, 'two']) ] ).join( subgrid=SimpleHypergrid( name="nested_grid", dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', False, True, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='zero_to_one', min=0, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, 'two', False]) ] ), on_external_dimension=CategoricalDimension("categorical_mixed_types", values=[True]) )
class ExperimentDesignerConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name='experiment_designer_config', dimensions=[ CategoricalDimension( 'utility_function_implementation', values=[ConfidenceBoundUtilityFunction.__name__]), CategoricalDimension('numeric_optimizer_implementation', values=[RandomSearchOptimizer.__name__]), ContinuousDimension('fraction_random_suggestions', min=0, max=1) ]).join(subgrid=ConfidenceBoundUtilityFunctionConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( 'utility_function_implementation', values=[ConfidenceBoundUtilityFunction.__name__])).join( subgrid=RandomSearchOptimizerConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( 'numeric_optimizer_implementation', values=[RandomSearchOptimizer.__name__])) _DEFAULT = Point( utility_function_implementation=ConfidenceBoundUtilityFunction. __name__, numeric_optimizer_implementation=RandomSearchOptimizer.__name__, confidence_bound_utility_function_config= ConfidenceBoundUtilityFunctionConfig.DEFAULT, random_search_optimizer_config=RandomSearchOptimizerConfig.DEFAULT, fraction_random_suggestions=0.5)
class BayesianOptimizerConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name="bayesian_optimizer_config", dimensions=[ CategoricalDimension( name="surrogate_model_implementation", values=[HomogeneousRandomForestRegressionModel.__name__]), CategoricalDimension(name="experiment_designer_implementation", values=[ExperimentDesigner.__name__]), DiscreteDimension( name="min_samples_required_for_guided_design_of_experiments", min=2, max=10000) ]).join( subgrid=HomogeneousRandomForestRegressionModelConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( name="surrogate_model_implementation", values=[HomogeneousRandomForestRegressionModel.__name__ ])).join(subgrid=ExperimentDesignerConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension( name="experiment_designer_implementation", values=[ExperimentDesigner.__name__])) _DEFAULT = Point( surrogate_model_implementation=HomogeneousRandomForestRegressionModel. __name__, experiment_designer_implementation=ExperimentDesigner.__name__, min_samples_required_for_guided_design_of_experiments=10, homogeneous_random_forest_regression_model_config= HomogeneousRandomForestRegressionModelConfig.DEFAULT, experiment_designer_config=ExperimentDesignerConfig.DEFAULT)
def __init__(self, objective_function_config: Point): assert objective_function_config.polynomial_objective_config in PolynomialObjective.CONFIG_SPACE ObjectiveFunctionBase.__init__(self, objective_function_config) # Let's start building the parameter space for it. # self._parameter_space = SimpleHypergrid( name="domain", dimensions=[ CategoricalDimension(name="polynomial_id", values=[id for id in range(self.objective_function_config.num_nested_polynomials)]) ] ) polynomial_objective_config = self.objective_function_config.polynomial_objective_config self._polynomial_objective_config = polynomial_objective_config self._polynomials = [] # Let's create the required number of polynomials. # for i in range(self.objective_function_config.num_nested_polynomials): polynomial_objective_config.seed += i + 1 # Change the seed so that it's still effective but also reproducible. polynomial = PolynomialObjectiveWrapper(polynomial_objective_config, domain_name=f"domain_{i}") self._polynomials.append(polynomial) self._parameter_space.join( subgrid=polynomial.parameter_space, on_external_dimension=CategoricalDimension(name="polynomial_id", values=[i]) ) self._output_space = SimpleHypergrid( name='output_space', dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] )
def setUp(self): self.model_config = RegressionEnhancedRandomForestRegressionModelConfig.DEFAULT self.test_case_globals = { '2d_X_input_space': SimpleHypergrid(name="2d_X_search_domain", dimensions=[ ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0) ]), 'categorical_input_space': SimpleHypergrid(name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), CategoricalDimension(name='i0', values=['-5', '5']) ]), 'categorical_hierarchical_input_space': SimpleHypergrid(name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), CategoricalDimension(name='i0', values=['-5', '5']) ]), 'output_space': SimpleHypergrid(name="degree2_polynomial", dimensions=[ ContinuousDimension( name="degree2_polynomial_y", min=-10**15, max=10**15) ]) }
def test_categorical_dimension(self): categorical_dimension = CategoricalDimension(name='categorical', values=[0, 1, True, False, "red", "green", "blue", 3.14, 7.5]) serialized = OptimizerMonitoringServiceEncoder.encode_categorical_dimension(categorical_dimension) deserialized_categorical_dimension = OptimizerMonitoringServiceDecoder.decode_categorical_dimension(serialized) assert isinstance(serialized, OptimizerMonitoringService_pb2.CategoricalDimension) assert categorical_dimension == deserialized_categorical_dimension
class ConfidenceBoundUtilityFunctionConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid( name="confidence_bound_utility_function_config", dimensions=[ CategoricalDimension(name="utility_function_name", values=[ "lower_confidence_bound_on_improvement", "upper_confidence_bound_on_improvement" ]), ContinuousDimension(name="alpha", min=0.01, max=0.5) ]) _DEFAULT = Point( utility_function_name="upper_confidence_bound_on_improvement", alpha=0.01) @classmethod def create_from_config_point(cls, config_point): config_key_value_pairs = { param_name: value for param_name, value in config_point } return cls(**config_key_value_pairs) def __init__(self, utility_function_name=_DEFAULT.utility_function_name, alpha=_DEFAULT.alpha): self.utility_function_name = utility_function_name self.alpha = alpha
def test_that_getitem_returns_dimensions(self): """ Tests if we can use the __getitem__ operator to retrieve a dimension. :return: """ cache_implementation_dimension = self.cache_param_space["cache_implementation_name"] self.assertTrue(cache_implementation_dimension ==CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache'])) num_bits_dimension = self.cache_param_space["associative_cache_config"]["lowest_bits"]["num_bits"] self.assertTrue(num_bits_dimension == self.lowest_bits_param_space["num_bits"])
def test_that_collision_throws(self): """ Test that if we try to join on a subgrid that has the same name as an existing dimension, we throw. This is because the __getitem__ can return either a dimension or a subgrid, so their names cannot collide. :return: """ with self.assertRaises(ValueError): SimpleHypergrid( name="collisions", dimensions=[ CategoricalDimension(name="associative_cache_config", values=[True, False]), CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache']) ] ).join( subgrid=self.associative_cache_implementation_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache']) )
def __init__( self, parameter_space: Hypergrid, objective_space: Hypergrid, objectives: List[Objective], context_space: Hypergrid = None, ): self.parameter_space = parameter_space self.context_space = context_space assert not any( isinstance(dimension, CategoricalDimension) for dimension in objective_space.dimensions ), "Objective dimension cannot be Categorical." objective_dimension_names = { dimension.name for dimension in objective_space.dimensions } assert all( objective.name in objective_dimension_names for objective in objectives), "All objectives must belong to objective space." self.objective_space = objective_space # We need to keep track of which objective to minimize, and which one to maximize. self.objectives = objectives self.objective_names = [ objective.name for objective in self.objectives ] # Fit functions / surrogate models will be fed features consisting of both context and parameters. # Thus, the feature space is comprised of both context and parameters. has_context = self.context_space is not None self.feature_space = SimpleHypergrid( name="features", dimensions=[ CategoricalDimension(name="contains_context", values=[has_context]) ]).join(subgrid=self.parameter_space, on_external_dimension=CategoricalDimension( name="contains_context", values=[has_context])) if has_context: self.feature_space = self.feature_space.join( subgrid=self.context_space, on_external_dimension=CategoricalDimension( name="contains_context", values=[True]))
class HomogeneousRandomForestRegressionModelConfig(RegressionModelConfig): CONFIG_SPACE = SimpleHypergrid( name="homogeneous_random_forest_regression_model_config", dimensions=[ DiscreteDimension(name="n_estimators", min=1, max=100), ContinuousDimension(name="features_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True), ContinuousDimension(name="samples_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True), CategoricalDimension(name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__]), ] ).join( subgrid=DecisionTreeRegressionModelConfig.CONFIG_SPACE, on_external_dimension=CategoricalDimension(name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__]) ) _DEFAULT = Point( n_estimators=5, features_fraction_per_estimator=1, samples_fraction_per_estimator=0.7, regressor_implementation=DecisionTreeRegressionModel.__name__, decision_tree_regression_model_config=DecisionTreeRegressionModelConfig.DEFAULT ) def __init__( self, n_estimators=_DEFAULT.n_estimators, features_fraction_per_estimator=_DEFAULT.features_fraction_per_estimator, samples_fraction_per_estimator=_DEFAULT.samples_fraction_per_estimator, regressor_implementation=_DEFAULT.regressor_implementation, decision_tree_regression_model_config: Point()=_DEFAULT.decision_tree_regression_model_config ): self.n_estimators = n_estimators self.features_fraction_per_estimator = features_fraction_per_estimator self.samples_fraction_per_estimator = samples_fraction_per_estimator self.regressor_implementation = regressor_implementation assert regressor_implementation == DecisionTreeRegressionModel.__name__ self.decision_tree_regression_model_config = DecisionTreeRegressionModelConfig.create_from_config_point(decision_tree_regression_model_config) @classmethod def contains(cls, config): # pylint: disable=unused-argument return True # TODO: see if you can remove this class entirely.
def test_randomly_generating_team_member(self): self.logger.info("Starting first check in test.") mlos_team = SimpleHypergrid( name="mlos_team", dimensions=[ CategoricalDimension(name="member", values=["Ed", "Greg", "Sergiy", "Yaser", "Adam", "Zack"]) ] ) random_member = mlos_team.random() assert random_member in mlos_team
def decode_categorical_dimension( serialized: OptimizerMonitoringService_pb2.CategoricalDimension ) -> CategoricalDimension: assert isinstance(serialized, OptimizerMonitoringService_pb2.CategoricalDimension) return CategoricalDimension( name=serialized.Name, values=[ OptimizerMonitoringServiceDecoder.decode_primitive_value(value) for value in serialized.Values ])
def setup_class(cls) -> None: cls.simple_hypergrid = SimpleHypergrid( name='simple_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 5]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='z_one', min=-1, max=2), ContinuousDimension(name='z_two', min=-2, max=1), ContinuousDimension(name='z_3', min=-2, max=-1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[1, False, 'two']) ]) cls.unbalanced_hierarchical_hypergrid = SimpleHypergrid( name='hierarchical_adaptee', dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='x1', min=-1, max=1), ContinuousDimension(name='x2', min=-1, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, False, 'two']) ]).join(subgrid=SimpleHypergrid( name="nested_grid", dimensions=[ CategoricalDimension(name='categorical_mixed_types', values=['red', False, True, 3]), DiscreteDimension(name='one_to_ten', min=1, max=10), ContinuousDimension(name='x1', min=-1, max=1), ContinuousDimension(name='x2', min=-1, max=1), OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, 'two', False]) ]), on_external_dimension=CategoricalDimension( "categorical_mixed_types", values=[True])) cls.balanced_hierarchical_hypergrid = ThreeLevelQuadratic( ).parameter_space
def test_optimization_problem_none_context(self): parameter_space = SimpleHypergrid( name="test", dimensions=[ ContinuousDimension(name="x", min=0, max=1), OrdinalDimension(name="y", ordered_values=[1, 2, 3, 5, 10]), CategoricalDimension(name="y2", values=[True, False]) ]) objective_space = SimpleHypergrid(name="z", dimensions=[ ContinuousDimension( name="z\n special", min=-50, max=-49), ContinuousDimension(name="z1", min=-1, max=1) ]) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name="z\n special", minimize=True), Objective(name="z1", minimize=False) ]) encoded_problem = OptimizerServiceEncoder.encode_optimization_problem( optimization_problem) decoded_problem = OptimizerServiceDecoder.decode_optimization_problem( encoded_problem) print(f"Context space is: {decoded_problem.context_space}") assert decoded_problem.context_space is None # Ensure that the parameter space is still valid # Parameter Space for _ in range(1000): assert decoded_problem.parameter_space.random() in parameter_space assert parameter_space.random() in decoded_problem.parameter_space # Output Space for _ in range(1000): assert decoded_problem.objective_space.random() in objective_space assert objective_space.random() in decoded_problem.objective_space # Feature Space for _ in range(1000): assert decoded_problem.feature_space.random( ) in optimization_problem.feature_space assert optimization_problem.feature_space.random( ) in decoded_problem.feature_space
class SimpleBayesianOptimizerConfig(metaclass=DefaultConfigMeta): CONFIG_SPACE = SimpleHypergrid(name="SimpleBayesianOptimizerConfig", dimensions=[ CategoricalDimension( name='utility_function', values=['ucb', 'ei', 'poi']), ContinuousDimension(name='kappa', min=-5, max=5), ContinuousDimension(name='xi', min=-5, max=5) ]) _DEFAULT = Point(utility_function='ucb', kappa=3, xi=1) @classmethod def contains(cls, config): if not isinstance(config, cls): return False return Point(utility_function=config.utility_function, kappa=config.kappa, xi=config.xi) in cls.CONFIG_SPACE @classmethod def create_from_config_point(cls, config_point): assert config_point in cls.CONFIG_SPACE return cls(utility_function=config_point.utility_function, kappa=config_point.kappa, xi=config_point.xi) def __init__(self, utility_function=None, kappa=None, xi=None): if utility_function is None: utility_function = self._DEFAULT.utility_function if kappa is None: kappa = self._DEFAULT.kappa if xi is None: xi = self._DEFAULT.xi self.utility_function = utility_function self.kappa = kappa self.xi = xi def to_dict(self): return { 'utility_function': self.utility_function, 'kappa': self.kappa, 'xi': self.xi }
def setup_method(self, method): self.model_config = regression_enhanced_random_forest_config_store.default self.test_case_globals = { '2d_X_input_space': SimpleHypergrid( name="2d_X_search_domain", dimensions=[ ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0) ] ), 'categorical_input_space': SimpleHypergrid( name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), CategoricalDimension(name='i0', values=['-5', '5']) ] ), 'categorical_hierarchical_input_space': SimpleHypergrid( name="categorical_search_domain", dimensions=[ CategoricalDimension(name='x0', values=['a', 'b', 'c']), ContinuousDimension(name="x1", min=0.0, max=5.0), ContinuousDimension(name="x2", min=0.0, max=5.0), CategoricalDimension(name='i0', values=['-5', '5']) ] ), 'output_space': SimpleHypergrid( name="degree2_polynomial", dimensions=[ ContinuousDimension(name="degree2_polynomial_y", min=-10 ** 15, max=10 ** 15) ] ) }
# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # from mlos.Spaces import Point, SimpleHypergrid, CategoricalDimension from mlos.Spaces.Configs import ComponentConfigStore from mlos.OptimizerEvaluationTools.SyntheticFunctions.PolynomialObjective import PolynomialObjective from mlos.OptimizerEvaluationTools.SyntheticFunctions.ThreeLevelQuadratic import ThreeLevelQuadratic from mlos.OptimizerEvaluationTools.SyntheticFunctions.Flower import Flower objective_function_config_store = ComponentConfigStore( parameter_space=SimpleHypergrid( name="objective_function", dimensions=[ CategoricalDimension(name="implementation", values=[ PolynomialObjective.__name__, ThreeLevelQuadratic.__name__, Flower.__name__, ]) ]).join(subgrid=PolynomialObjective.CONFIG_SPACE, on_external_dimension=CategoricalDimension( name="implementation", values=[PolynomialObjective.__name__])), default=Point( implementation=PolynomialObjective.__name__, # TODO: move polynomial objective to config store polynomial_objective_config=PolynomialObjective._DEFAULT, # pylint: disable=protected-access, )) objective_function_config_store.add_config_by_name( config_name="three_level_quadratic", config_point=Point(implementation=ThreeLevelQuadratic.__name__))
class SklearnRandomForestRegressionModelConfig(metaclass=DefaultConfigMeta): class MaxFeatures(Enum): """ The number of features to consider when looking for the best split - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. """ AUTO = "auto" SQRT = "sqrt" LOG2 = "log2" class Criterion(Enum): """ The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance reduction as feature selection criterion, and "mae" for the mean absolute error. """ MSE = "mse" MAE = "mae" CONFIG_SPACE = SimpleHypergrid( name="sklearn_random_forest_regression_model_config", dimensions=[ DiscreteDimension(name="n_estimators", min=1, max=2**10), CategoricalDimension( name="criterion", values=[criterion.value for criterion in Criterion]), DiscreteDimension(name="max_depth", min=0, max=2**10), ContinuousDimension(name="min_samples_split", min=2, max=2**10), ContinuousDimension(name="min_samples_leaf", min=1, max=2**10), ContinuousDimension(name="min_weight_fraction_leaf", min=0, max=0.5), CategoricalDimension( name="max_features", values=[max_feature.value for max_feature in MaxFeatures]), DiscreteDimension(name="max_leaf_nodes", min=0, max=2**10), ContinuousDimension(name="min_impurity_decrease", min=0, max=2**10), CategoricalDimension(name="bootstrap", values=[False, True]), CategoricalDimension(name="oob_score", values=[False, True]), DiscreteDimension(name="n_jobs", min=1, max=2**10), CategoricalDimension(name="warm_start", values=[False, True]), ContinuousDimension(name="ccp_alpha", min=0, max=2**10), ContinuousDimension(name="max_samples", min=0, max=2**10) ]) _DEFAULT = Point( n_estimators=100, criterion=Criterion.MSE.value, max_depth= 0, # overloading 0 as None to deal with sklearn param type interpretation min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=MaxFeatures.AUTO.value, max_leaf_nodes= 0, # overloading 0 as None to deal with sklearn param type interpretation min_impurity_decrease=0, bootstrap=True, oob_score=False, n_jobs=1, warm_start=False, ccp_alpha=0, max_samples=0) @classmethod def contains(cls, config): return Point(n_estimators=config.n_estimators, criterion=config.criterion, max_depth=config.max_depth, min_samples_split=config.min_samples_split, min_samples_leaf=config.min_samples_leaf, min_weight_fraction_leaf=config.min_weight_fraction_leaf, max_features=config.max_features, max_leaf_nodes=config.max_leaf_nodes, min_impurity_decrease=config.min_impurity_decrease, bootstrap=config.bootstrap, oob_score=config.oob_score, n_jobs=config.n_jobs, warm_start=config.warm_start, ccp_alpha=config.ccp_alpha, max_samples=config.max_samples) in cls.CONFIG_SPACE @classmethod def create_from_config_point(cls, config_point): assert cls.contains(config_point) config_key_value_pairs = { param_name: value for param_name, value in config_point } return cls(**config_key_value_pairs) def __init__(self, n_estimators=_DEFAULT.n_estimators, criterion=_DEFAULT.criterion, max_depth=_DEFAULT.max_depth, min_samples_split=_DEFAULT.min_samples_split, min_samples_leaf=_DEFAULT.min_samples_leaf, min_weight_fraction_leaf=_DEFAULT.min_weight_fraction_leaf, max_features=_DEFAULT.max_features, max_leaf_nodes=_DEFAULT.max_leaf_nodes, min_impurity_decrease=_DEFAULT.min_impurity_decrease, bootstrap=_DEFAULT.bootstrap, oob_score=_DEFAULT.oob_score, n_jobs=_DEFAULT.n_jobs, warm_start=_DEFAULT.warm_start, ccp_alpha=_DEFAULT.ccp_alpha, max_samples=_DEFAULT.max_samples): """ Random Forest parameters: :param n_estimators: The number of trees in the forest. :param criterion: The function to measure the quality of a split. Supported criteria are "mse" for the mean squared error, which is equal to variance reduction as feature selection criterion, and "mae" for the mean absolute error. :param max_depth: The maximum depth of the tree. If None, then nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples. :param min_samples_split: The minimum number of samples required to split an internal node :param min_samples_leaf: The minimum number of samples required to be at a leaf node. A split point at any depth will only be considered if it leaves at least ``min_samples_leaf`` training samples in each of the left and right branches. This may have the effect of smoothing the model, especially in regression. :param min_weight_fraction_leaf: The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. :param max_features: The number of features to consider when looking for the best split - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. - If None, then `max_features=n_features`. :param max_leaf_nodes: Grow trees with ``max_leaf_nodes`` in best-first fashion. :param min_impurity_decrease: A node will be split if this split induces a decrease of the impurity greater than or equal to this value. :param bootstrap: Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree. :param oob_score: Whether to use out-of-bag samples to estimate the R^2 on unseen data. :param n_jobs: The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`, :meth:`decision_path` and :meth:`apply` are all parallelized over the trees. :param warm_start: When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just fit a whole new forest. :param ccp_alpha: Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. .. versionadded:: 0.22 :param max_samples: If bootstrap is True, the number of samples to draw from X to train each base estimator. """ self.n_estimators = n_estimators self.criterion = criterion self.max_depth = max_depth self.min_samples_split = min_samples_split self.min_samples_leaf = min_samples_leaf self.min_weight_fraction_leaf = min_weight_fraction_leaf self.max_features = max_features self.max_leaf_nodes = max_leaf_nodes self.min_impurity_decrease = min_impurity_decrease self.bootstrap = bootstrap self.oob_score = oob_score self.n_jobs = n_jobs self.warm_start = warm_start self.ccp_alpha = ccp_alpha self.max_samples = max_samples # sklearn random forest regressor interprets max_depth = None differently than an int value # so mapping max_depth=0 to None here @property def max_depth_value(self): if self.max_depth == 0: return None return self.max_depth @property # similar mapping here as for max_depth def max_leaf_nodes_value(self): if self.max_leaf_nodes == 0 or self.max_leaf_nodes == 1: return None return self.max_leaf_nodes @property # similar mapping here as for max_depth def max_sample_value(self): if self.max_samples == 0: return None return self.max_samples
from mlos.Spaces.Configs.ComponentConfigStore import ComponentConfigStore from .UtilityFunctionOptimizers.RandomSearchOptimizer import RandomSearchOptimizer, random_search_optimizer_config_store from .UtilityFunctionOptimizers.GlowWormSwarmOptimizer import GlowWormSwarmOptimizer, glow_worm_swarm_optimizer_config_store from .UtilityFunctions.ConfidenceBoundUtilityFunction import ConfidenceBoundUtilityFunction, confidence_bound_utility_function_config_store from .UtilityFunctions.MultiObjectiveProbabilityOfImprovementUtilityFunction import MultiObjectiveProbabilityOfImprovementUtilityFunction,\ multi_objective_probability_of_improvement_utility_function_config_store experiment_designer_config_store = ComponentConfigStore( parameter_space=SimpleHypergrid( name='experiment_designer_config', dimensions=[ CategoricalDimension( 'utility_function_implementation', values=[ ConfidenceBoundUtilityFunction.__name__, MultiObjectiveProbabilityOfImprovementUtilityFunction. __name__ ]), CategoricalDimension('numeric_optimizer_implementation', values=[ RandomSearchOptimizer.__name__, GlowWormSwarmOptimizer.__name__ ]), ContinuousDimension('fraction_random_suggestions', min=0, max=1) ]).join(subgrid=confidence_bound_utility_function_config_store. parameter_space, on_external_dimension=CategoricalDimension( 'utility_function_implementation', values=[ConfidenceBoundUtilityFunction.__name__])). join(
# Licensed under the MIT License. # from mlos.Spaces import SimpleHypergrid, DiscreteDimension, CategoricalDimension, Point from mlos.Spaces.Configs.ComponentConfigStore import ComponentConfigStore from mlos.Optimizers.ExperimentDesigner.ExperimentDesigner import ExperimentDesigner, experiment_designer_config_store from mlos.Optimizers.RegressionModels.HomogeneousRandomForestConfigStore import homogeneous_random_forest_config_store from mlos.Optimizers.RegressionModels.HomogeneousRandomForestRegressionModel import HomogeneousRandomForestRegressionModel bayesian_optimizer_config_store = ComponentConfigStore( parameter_space=SimpleHypergrid( name="bayesian_optimizer_config", dimensions=[ CategoricalDimension( name="surrogate_model_implementation", values=[ HomogeneousRandomForestRegressionModel.__name__, ]), CategoricalDimension(name="experiment_designer_implementation", values=[ExperimentDesigner.__name__]), DiscreteDimension( name="min_samples_required_for_guided_design_of_experiments", min=2, max=10000) ]).join( subgrid=homogeneous_random_forest_config_store.parameter_space, on_external_dimension=CategoricalDimension( name="surrogate_model_implementation", values=[ HomogeneousRandomForestRegressionModel.__name__ ])).join(
class SklearnLassoRegressionModelConfig(metaclass=DefaultConfigMeta): class Selection(Enum): """ Parameter to sklearn lasso regressor controlling how model coefficients are selected for update. From https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html: If set to ‘random’, a random coefficient is updated every iteration rather than looping over features sequentially by default. This (setting to ‘random’) often leads to significantly faster convergence especially when tol is higher than 1e-4. """ CYCLIC = 'cyclic' RANDOM = 'random' CONFIG_SPACE = SimpleHypergrid( name="sklearn_lasso_regression_model_config", dimensions=[ ContinuousDimension(name="alpha", min=0, max=2**16), CategoricalDimension(name="fit_intercept", values=[False, True]), CategoricalDimension(name="normalize", values=[False, True]), CategoricalDimension(name="precompute", values=[False, True]), CategoricalDimension(name="copy_x", values=[False, True]), DiscreteDimension(name="max_iter", min=0, max=10**5), ContinuousDimension(name="tol", min=0, max=2**10), CategoricalDimension(name="warm_start", values=[False, True]), CategoricalDimension(name="positive", values=[False, True]), CategoricalDimension( name="selection", values=[selection.value for selection in Selection]), ]) _DEFAULT = Point( selection=Selection.CYCLIC.value, alpha=1.0, fit_intercept=False, normalize=False, # sklearn model expects precompute type str, bool, array-like, so setting to default and exclude list option precompute=False, copy_x=True, max_iter=2000, tol=10**-4, warm_start=False, positive=False) @classmethod def contains(cls, config): return Point(alpha=config.alpha, fit_intercept=config.fit_intercept, normalize=config.normalize, precompute=config.precompute, copy_x=config.copy_x, max_iter=config.max_iter, tol=config.tol, warm_start=config.warm_start, positive=config.positive, selection=config.selection) in cls.CONFIG_SPACE @classmethod def create_from_config_point(cls, config_point): assert cls.contains(config_point) config_key_value_pairs = { param_name: value for param_name, value in config_point } return cls(**config_key_value_pairs) def __init__(self, alpha=_DEFAULT.alpha, fit_intercept=_DEFAULT.fit_intercept, normalize=_DEFAULT.normalize, precompute=_DEFAULT.precompute, copy_x=_DEFAULT.copy_x, max_iter=_DEFAULT.max_iter, tol=_DEFAULT.tol, warm_start=_DEFAULT.warm_start, positive=_DEFAULT.positive, random_state=None, selection=_DEFAULT.selection): """ Lasso parameters: :param alpha: Constant that multiplies the L1 term. Defaults to 1.0. :param fit_intercept: Whether to calculate the intercept for this model. :param normalize: This parameter is ignored when ``fit_intercept`` is set to False. If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. :param precompute: Whether to use a precomputed Gram matrix to speed up calculations. If set to ``'auto'`` let us decide. :param copy_x: If ``True``, X will be copied; else, it may be overwritten. :param max_iter: The maximum number of iterations :param tol: The tolerance for the optimization: if the updates are smaller than ``tol``, the optimization code checks the dual gap for optimality and continues until it is smaller than ``tol``. :param warm_start: When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution. :param positive: When set to ``True``, forces the coefficients to be positive. :param random_state: The seed of the pseudo random number generator that selects a random feature to update. Used when ``selection`` == 'random'. :param selection: {'cyclic', 'random'} If set to 'random', a random coefficient is updated every iteration rather than looping over features sequentially by default. """ self.alpha = alpha self.fit_intercept = fit_intercept self.normalize = normalize self.precompute = precompute self.copy_x = copy_x self.max_iter = max_iter self.tol = tol self.warm_start = warm_start self.positive = positive self.random_state = random_state self.selection = selection
def setUp(self): self.cache_param_space = SimpleHypergrid( name='cache_param_space', dimensions=[ CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache']) ] ) self.lru_cache_param_space = SimpleHypergrid( name='lru_cache_config', dimensions=[ DiscreteDimension(name='size', min=1, max=2**20), OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red']) ] ) self.associative_cache_implementation_root_param_space = SimpleHypergrid( name='associative_cache_config', dimensions=[ CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']), CategoricalDimension(name='bucket_implementation', values=['single_value', 'binary_search_tree', 'linked_list']) ] ) self.mod_prime_hash_function_param_space = SimpleHypergrid( name='mod_prime_hash_function', dimensions=[ OrdinalDimension(name='prime', ordered_values=[1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59]) ] ) self.lowest_bits_param_space = SimpleHypergrid( name='lowest_bits', dimensions=[ DiscreteDimension(name='num_bits', min=1, max=64) ] ) self.binary_search_tree_param_space = SimpleHypergrid( name='binary_search_tree', dimensions=[ DiscreteDimension(name='max_depth', min=1, max=2**10) ] ) self.linked_list_param_space = SimpleHypergrid( name='linked_list', dimensions=[ DiscreteDimension(name='max_length', min=1, max=2**10) ] ) self.associative_cache_implementation_param_space = self.associative_cache_implementation_root_param_space.join( subgrid=self.mod_prime_hash_function_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function']) ).join( subgrid=self.lowest_bits_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values='lowest_bits') ).join( subgrid=self.binary_search_tree_param_space, on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['binary_search_tree']) ) self.cache_param_space = self.cache_param_space.join( subgrid=self.lru_cache_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['lru_cache']) ).join( subgrid=self.associative_cache_implementation_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache']) ).join( subgrid=self.linked_list_param_space, on_external_dimension=CategoricalDimension(name='associative_cache_config.bucket_implementation', values=['linked_list']) )
def test_optimization_problem(self): parameter_space = SimpleHypergrid( name="test", dimensions=[ ContinuousDimension(name="x",min=0,max=1), CategoricalDimension(name="y",values=[1,2,3]) ] ) objective_space = SimpleHypergrid( name="z", dimensions=[ ContinuousDimension(name="z",min=0,max=1), ContinuousDimension(name="z1",min=-1,max=1) ] ) context_space = SimpleHypergrid( name="context_space", dimensions=[ ContinuousDimension(name="x_c",min=0,max=1), CategoricalDimension(name="y_c",values=[1,2,3,4,6]) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name="z",minimize=True), Objective(name="z1",minimize=False) ], context_space=context_space ) encoded_problem = OptimizerMonitoringServiceEncoder.encode_optimization_problem(optimization_problem) decoded_problem = OptimizerMonitoringServiceDecoder.decode_optimization_problem(encoded_problem) # A = B iff A >= B && B <= A # Could be condensed to single loop but easier to read this way. # Parameter Space for _ in range(1000): assert decoded_problem.parameter_space.random() in parameter_space assert parameter_space.random() in decoded_problem.parameter_space # Output Space for _ in range(1000): assert decoded_problem.objective_space.random() in objective_space assert objective_space.random() in decoded_problem.objective_space # Context Space for _ in range(1000): assert decoded_problem.context_space.random() in context_space assert context_space.random() in decoded_problem.context_space # Feature Space for _ in range(1000): assert decoded_problem.feature_space.random() in optimization_problem.feature_space assert optimization_problem.feature_space.random() in decoded_problem.feature_space print(decoded_problem.objectives) assert len(decoded_problem.objectives) == 2 assert decoded_problem.objectives[0].name == "z" assert decoded_problem.objectives[1].name == "z1" assert decoded_problem.objectives[0].minimize assert not decoded_problem.objectives[1].minimize
from mlos.Optimizers.RegressionModels.HomogeneousRandomForestConfigStore import homogeneous_random_forest_config_store from mlos.Optimizers.RegressionModels.HomogeneousRandomForestRegressionModel import HomogeneousRandomForestRegressionModel from mlos.Optimizers.RegressionModels.MultiObjectiveHomogeneousRandomForest import MultiObjectiveHomogeneousRandomForest from mlos.Optimizers.RegressionModels.LassoCrossValidatedConfigStore import lasso_cross_validated_config_store from mlos.Optimizers.RegressionModels.MultiObjectiveLassoCrossValidated import MultiObjectiveLassoCrossValidated from mlos.Optimizers.RegressionModels.RegressionEnhancedRandomForestConfigStore import regression_enhanced_random_forest_config_store from mlos.Optimizers.RegressionModels.MultiObjectiveRegressionEnhancedRandomForest import MultiObjectiveRegressionEnhancedRandomForest bayesian_optimizer_config_store = ComponentConfigStore( parameter_space=SimpleHypergrid( name="bayesian_optimizer_config", dimensions=[ CategoricalDimension( name="surrogate_model_implementation", values=[ HomogeneousRandomForestRegressionModel.__name__, MultiObjectiveHomogeneousRandomForest.__name__, MultiObjectiveLassoCrossValidated.__name__, MultiObjectiveRegressionEnhancedRandomForest.__name__ ]), CategoricalDimension(name="experiment_designer_implementation", values=[ExperimentDesigner.__name__]), DiscreteDimension( name="min_samples_required_for_guided_design_of_experiments", min=2, max=100) ]).join( subgrid=homogeneous_random_forest_config_store.parameter_space, on_external_dimension=CategoricalDimension( name="surrogate_model_implementation", values=[ HomogeneousRandomForestRegressionModel.__name__,
"\n" "Dimensions:\n" "- num_iterations: how many optimization iterations to run.\n" "- evaluation_frequency: how often should the evaluator capture the optima and goodness of fit metrics (e.g. every 10 iterations).\n" "- include_pickled_optimizer_in_report: should the state of the optimizer be pickled and saved.\n" "- include_pickled_objective_function_in_report: should the final state of the objective function be pickled and saved.\n" "- report_regression_model_goodness_of_fit: should the goodness of fit metrics be included in the evaluation report.\n" "- report_optima_over_time: should the optima over time be included in the evaluation report.\n" "- include_execution_trace_in_report: should the execution trace produced by mlos.Tracer be included in the evaluation report.", parameter_space=SimpleHypergrid( name="optimizer_evaluator", dimensions=[ DiscreteDimension(name="num_iterations", min=1, max=2**32), DiscreteDimension(name="evaluation_frequency", min=1, max=2**10), CategoricalDimension(name="include_pickled_optimizer_in_report", values=[True, False]), CategoricalDimension(name="include_pickled_objective_function_in_report", values=[True, False]), CategoricalDimension(name="report_regression_model_goodness_of_fit", values=[True, False]), CategoricalDimension(name="report_optima_over_time", values=[True, False]), CategoricalDimension(name="report_pareto_over_time", values=[True, False]), CategoricalDimension(name="report_pareto_volume_over_time", values=[True, False]), CategoricalDimension(name="include_execution_trace_in_report", values=[True, False]), ] ), default=Point( num_iterations=100, evaluation_frequency=10, include_pickled_optimizer_in_report=True, include_pickled_objective_function_in_report=True, report_regression_model_goodness_of_fit=True, report_optima_over_time=True,
parameter_space=SimpleHypergrid( name="homogeneous_random_forest_regression_model_config", dimensions=[ DiscreteDimension(name="n_estimators", min=1, max=10000), ContinuousDimension(name="features_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True), ContinuousDimension(name="samples_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True), CategoricalDimension(name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__ ]), CategoricalDimension(name="bootstrap", values=[True, False]) ]).join(subgrid=decision_tree_config_store.parameter_space, on_external_dimension=CategoricalDimension( name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__])), default=Point( n_estimators=10, features_fraction_per_estimator=1, samples_fraction_per_estimator=1, regressor_implementation=DecisionTreeRegressionModel.__name__, decision_tree_regression_model_config=decision_tree_config_store. default, bootstrap=True), description="TODO")
import json import os import sys mlos_root_path = os.environ['MLOS_ROOT'] mlos_python_root_path = os.path.join(mlos_root_path, 'Source', 'Mlos.Python') sys.path.append(mlos_python_root_path) from mlos.Spaces import SimpleHypergrid, EmptyDimension, CategoricalDimension, ContinuousDimension, DiscreteDimension, OrdinalDimension from mlos.Spaces.HypergridsJsonEncoderDecoder import HypergridJsonEncoder, HypergridJsonDecoder continuous = ContinuousDimension(name='continuous', min=1, max=10) discrete = DiscreteDimension(name='discrete', min=1, max=10) ordinal = OrdinalDimension(name='ordinal', ordered_values=[1,2,3,4,5,6,7,8,9,10]) categorical = CategoricalDimension(name='categorical', values=[1,2,3,4,5,6,7,8,9,10]) simple_hypergrid = SimpleHypergrid( name='all_kinds_of_dimensions', dimensions = [ continuous, discrete, ordinal, categorical ] ) py_simple_hypergrid_json_string = json.dumps(simple_hypergrid, cls=HypergridJsonEncoder)