def decode_simple_hypergrid(hypergrid: OptimizerService_pb2.SimpleHypergrid) -> SimpleHypergrid: assert isinstance(hypergrid, OptimizerService_pb2.SimpleHypergrid) decoded_hypergrid = SimpleHypergrid( name=hypergrid.Name, dimensions=[OptimizerServiceDecoder.decode_dimension(dimension) for dimension in hypergrid.Dimensions] ) for subgrid in hypergrid.GuestSubgrids: decoded_subgrid = OptimizerServiceDecoder.decode_subgrid(subgrid) decoded_hypergrid.join( subgrid=decoded_subgrid.subgrid, on_external_dimension=decoded_subgrid.join_dimension ) return decoded_hypergrid
class TestHierarchicalHypergrid3(unittest.TestCase): """ Tests the join on external dimension in hypergrids. In particular: * Hypergrid.join(subgrid, on_external_dimension=SomeDimension(...)) should: * Check if the dimension.name contains a subgrid name: * if yes - drop the prefix and call dimension_subgrid.join(subgrid, on_external_dimension) * otherwise we are joining here so: * if not dimension.intersects(self[dimension.name]): return self * self.joined_subgrids_by_pivot_dimension[dimension.name] = JoinedHypergrid(dimension, subgrid) * Randomly generating points from the supergrid should generate points from the newly joined subgrid * Point containment should work * Hypergrid containment should work (eventually) """ def setUp(self): self.cache_param_space = SimpleHypergrid( name='cache_param_space', dimensions=[ CategoricalDimension(name='cache_implementation_name', values=['lru_cache', 'associative_cache']) ] ) self.lru_cache_param_space = SimpleHypergrid( name='lru_cache_config', dimensions=[ DiscreteDimension(name='size', min=1, max=2**20), OrdinalDimension(name='color', ordered_values=['green', 'orange', 'red']) ] ) self.associative_cache_implementation_root_param_space = SimpleHypergrid( name='associative_cache_config', dimensions=[ CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function', 'lowest_bits']), CategoricalDimension(name='bucket_implementation', values=['single_value', 'binary_search_tree', 'linked_list']) ] ) self.mod_prime_hash_function_param_space = SimpleHypergrid( name='mod_prime_hash_function', dimensions=[ OrdinalDimension(name='prime', ordered_values=[1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59]) ] ) self.lowest_bits_param_space = SimpleHypergrid( name='lowest_bits', dimensions=[ DiscreteDimension(name='num_bits', min=1, max=64) ] ) self.binary_search_tree_param_space = SimpleHypergrid( name='binary_search_tree', dimensions=[ DiscreteDimension(name='max_depth', min=1, max=2**10) ] ) self.linked_list_param_space = SimpleHypergrid( name='linked_list', dimensions=[ DiscreteDimension(name='max_length', min=1, max=2**10) ] ) self.associative_cache_implementation_param_space = self.associative_cache_implementation_root_param_space.join( subgrid=self.mod_prime_hash_function_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values=['mod_prime_hash_function']) ).join( subgrid=self.lowest_bits_param_space, on_external_dimension=CategoricalDimension(name='hash_function_name', values='lowest_bits') ).join( subgrid=self.binary_search_tree_param_space, on_external_dimension=CategoricalDimension(name='bucket_implementation', values=['binary_search_tree']) ) self.cache_param_space = self.cache_param_space.join( subgrid=self.lru_cache_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['lru_cache']) ).join( subgrid=self.associative_cache_implementation_param_space, on_external_dimension=CategoricalDimension(name='cache_implementation_name', values=['associative_cache']) ).join( subgrid=self.linked_list_param_space, on_external_dimension=CategoricalDimension(name='associative_cache_config.bucket_implementation', values=['linked_list']) ) def test_external_dimension_join(self): for _ in range(10): print("################################################") random_config = self.cache_param_space.random() for param_name, value in random_config: print(param_name, value) print(random_config in self.cache_param_space) print("################################################")
class TestHierarchicalSpaces(unittest.TestCase): def setUp(self): self.emergency_buffer_settings = SimpleHypergrid( name='emergency_buffer_config', dimensions=[ DiscreteDimension(name='log2_emergency_buffer_size', min=0, max=16), CategoricalDimension(name='use_colors', values=[True, False]) ]) self.emergency_buffer_color = SimpleHypergrid( name='emergency_buffer_color', dimensions=[ CategoricalDimension(name='color', values=['Maroon', 'Crimson', 'Tanager']) ]) self.emergency_buffer_settings_with_color = self.emergency_buffer_settings.join( subgrid=self.emergency_buffer_color, on_external_dimension=CategoricalDimension(name='use_colors', values=[True])) self.hierarchical_settings = SimpleHypergrid( name='communication_channel_config', dimensions=[ DiscreteDimension(name='num_readers', min=1, max=64), DiscreteDimension(name='log2_buffer_size', min=10, max=24), CategoricalDimension(name='use_emergency_buffer', values=[True, False]) ]).join(subgrid=self.emergency_buffer_settings_with_color, on_external_dimension=CategoricalDimension( name='use_emergency_buffer', values=[True])) def test_composite_spaces(self): valid_config_no_emergency_buffer = Point(num_readers=1, log2_buffer_size=10, use_emergency_buffer=False) self.assertTrue( valid_config_no_emergency_buffer in self.hierarchical_settings) valid_emergency_buffer_config = Point(log2_emergency_buffer_size=2, use_colors=False) self.assertTrue( valid_emergency_buffer_config in self.emergency_buffer_settings) valid_config_with_emergency_buffer = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True, emergency_buffer_config=valid_emergency_buffer_config) self.assertTrue( valid_config_with_emergency_buffer in self.hierarchical_settings) valid_emergency_buffer_color_config = Point(color='Crimson') valid_emergency_buffer_color_config_with_pivot_dimension = valid_emergency_buffer_color_config.copy( ) valid_emergency_buffer_color_config_with_pivot_dimension[ 'use_colors'] = True self.assertTrue( valid_emergency_buffer_color_config_with_pivot_dimension in self.emergency_buffer_color) valid_colorful_emergency_buffer_config = Point( log2_emergency_buffer_size=2, use_colors=True, emergency_buffer_color=valid_emergency_buffer_color_config) valid_colorful_emergency_buffer_config_with_pivot_dimension = valid_colorful_emergency_buffer_config.copy( ) valid_colorful_emergency_buffer_config_with_pivot_dimension[ 'use_emergency_buffer'] = True self.assertTrue( valid_colorful_emergency_buffer_config_with_pivot_dimension in self.emergency_buffer_settings_with_color) valid_config_with_emergency_buffer_colors = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True, emergency_buffer_config=valid_colorful_emergency_buffer_config) valid_config_with_emergency_buffer_and_redundant_coordinates = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=False, log2_emergency_buffer_size=2) self.assertTrue( valid_config_with_emergency_buffer_and_redundant_coordinates in self.hierarchical_settings) another_invalid_config_with_emergency_buffer = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True) yet_another_invalid_config_with_emergency_buffer = Point( num_readers=1, log2_buffer_size=10, use_emergency_buffer=True, log2_emergency_buffer_size=40) self.assertTrue( valid_config_no_emergency_buffer in self.hierarchical_settings) self.assertTrue( valid_config_no_emergency_buffer in self.hierarchical_settings) self.assertTrue( valid_config_with_emergency_buffer in self.hierarchical_settings) self.assertTrue(valid_config_with_emergency_buffer_colors in self.hierarchical_settings) self.assertTrue( valid_config_with_emergency_buffer_and_redundant_coordinates in self.hierarchical_settings) self.assertTrue(another_invalid_config_with_emergency_buffer not in self.hierarchical_settings) self.assertTrue(yet_another_invalid_config_with_emergency_buffer not in self.hierarchical_settings) def test_generating_random_configs(self): used_emergency_buffer = False used_color = False used_crimson = False # Let's seed it to make sure we get consistent test results random_state = random.Random() random_state.seed(1) self.hierarchical_settings.random_state = random_state for _ in range(100): random_config = self.hierarchical_settings.random() self.assertTrue(random_config in self.hierarchical_settings) used_emergency_buffer = used_emergency_buffer or random_config[ 'use_emergency_buffer'] if random_config['use_emergency_buffer']: used_color = used_color or random_config[ 'emergency_buffer_config']['use_colors'] if random_config['emergency_buffer_config']['use_colors']: used_crimson = used_crimson or ( random_config['emergency_buffer_config'] ['emergency_buffer_color']['color'] == 'Crimson') self.assertTrue(used_emergency_buffer) self.assertTrue(used_color) self.assertTrue(used_crimson) def test_reseeding_random_state(self): previous_iteration_first_pass_points = None for i in range(10): # let's seed the grid for the first time random_state = random.Random() random_state.seed(i) self.hierarchical_settings.random_state = random_state first_pass_points = [ self.hierarchical_settings.random() for _ in range(100) ] # let's do it again random_state = random.Random() random_state.seed(i) self.hierarchical_settings.random_state = random_state second_pass_points = [ self.hierarchical_settings.random() for _ in range(100) ] for first_pass_point, second_pass_point in zip( first_pass_points, second_pass_points): self.assertTrue(first_pass_point == second_pass_point) if previous_iteration_first_pass_points is not None: # Let's make sure we keep changing the points self.assertTrue( any(previous != current for previous, current in zip( previous_iteration_first_pass_points, first_pass_points))) previous_iteration_first_pass_points = first_pass_points
class OptimizationProblem: """Models an instance of an optimization problem. An instance of OptimizationProblem can be used to create a variety of optimizers and instantly enlighten them to what they are working with. Many optimization problems contain the same set of elements: 1. Decision Variables / Search Space - decision variables characterized by their allowed ranges and constraints form a Search Space. 2. Objectives - one or more values to optimize. Each objective is meant to be either maximized or minimized. 3. Context - this represents either: 1. controlled variables in an active learning scenarios, or 2. context information in an online learning scenario. For example if we are attempting to optimize a smart cache: Decision variables: * cache implementation (array, hashmap), each implementation's parameters: * array: size, associativity, eviction policy * hashmap: size, hash function, bucket data structure, bucket size, bucket eviction policy Objectives: * latency * cache memory footprint * recomputation cost (averge, median, total) * hit ratio * cache utilization Context: * workload characteristics: * true working set size (only known in active learning scenario) * estimated working set size (possibly many estimators, many Confidence Interval sizes) * recomputation cost distribution (true or estimated) * deployment context: * machine characteristics: * num cores * amount of ram * disk type * runtime state: * cpu utilization * ram utilization * etc Parameters ---------- parameter_space : Hypergrid Input parameter space for objective, i.e. the search space. objective_space : Hypergrid( Output space for the objective, can be (-inf, +inf) objectives : list[Objective] Objective function(s) to optimize, with input from parameter_space and output in objective_space. context_space : Hypergrid, default=None Additional run-time context features. Attributes ---------- feature_space : Hypergrid Joint space of parameters and context. """ # The dimensions that we inject to keep track of individual subspaces, but which are worthless # for modeling purposes. META_DIMENSION_NAMES = { "contains_parameters", "contains_context", "contains_objectives" } def __init__( self, parameter_space: Hypergrid, objective_space: Hypergrid, objectives: List[Objective], context_space: Hypergrid = None, ): self.parameter_space = parameter_space self.context_space = context_space assert not any( isinstance(dimension, CategoricalDimension) for dimension in objective_space.dimensions ), "Objective dimension cannot be Categorical." objective_dimension_names = { dimension.name for dimension in objective_space.dimensions } assert all( objective.name in objective_dimension_names for objective in objectives), "All objectives must belong to objective space." self.objective_space = objective_space # We need to keep track of which objective to minimize, and which one to maximize. self.objectives = objectives self.objective_names = [ objective.name for objective in self.objectives ] # Fit functions / surrogate models will be fed features consisting of both context and parameters. # Thus, the feature space is comprised of both context and parameters. has_context = self.context_space is not None self.feature_space = SimpleHypergrid( name="features", dimensions=[ CategoricalDimension(name="contains_context", values=[has_context]) ]).join(subgrid=self.parameter_space, on_external_dimension=CategoricalDimension( name="contains_context", values=[has_context])) if has_context: self.feature_space = self.feature_space.join( subgrid=self.context_space, on_external_dimension=CategoricalDimension( name="contains_context", values=[True])) def construct_feature_dataframe(self, parameters_df: pd.DataFrame, context_df: pd.DataFrame = None, product: bool = False): """Construct feature value dataframe from config value and context value dataframes. If product is True, creates a cartesian product, otherwise appends columns. """ if (self.context_space is not None) and (context_df is None): raise ValueError( "Context required by optimization problem but not provided.") # prefix column names to adhere to dimensions in hierarchical hypergrid # features_df = parameters_df.rename( lambda x: f"{self.parameter_space.name}.{x}", axis=1) if context_df is not None and len(context_df) > 0: renamed_context_values = context_df.rename( lambda x: f"{self.context_space.name}.{x}", axis=1) features_df['contains_context'] = True if product: renamed_context_values['contains_context'] = True features_df = features_df.merge(renamed_context_values, how='outer', on='contains_context') features_df.index = parameters_df.index.copy() else: if len(parameters_df) != len(context_df): raise ValueError( f"Incompatible shape of parameters and context: {parameters_df.shape} and {context_df.shape}." ) features_df = pd.concat([features_df, renamed_context_values], axis=1) else: features_df['contains_context'] = False return features_df def deconstruct_feature_dataframe( self, features_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: """Splits the feature dataframe back into parameters and context dataframes. This is a workaround. What we should really do is implement this functionality as a proper operator on Hypergrids. """ parameter_column_names_mapping = { f"{self.parameter_space.name}.{dimension_name}": dimension_name for dimension_name in self.parameter_space.dimension_names } existing_parameter_names = [ parameter_name for parameter_name in parameter_column_names_mapping.keys() if parameter_name in features_df.columns ] parameters_df = features_df[existing_parameter_names] parameters_df.rename(columns=parameter_column_names_mapping, inplace=True) if self.context_space is not None: context_column_names_mapping = { f"{self.context_space.name}.{dimension_name}": dimension_name for dimension_name in self.context_space.dimension_names } existing_context_column_names = [ column_name for column_name in context_column_names_mapping.keys() if column_name in features_df.columns ] context_df = features_df[existing_context_column_names] context_df.rename(columns=context_column_names_mapping, inplace=True) else: context_df = None return parameters_df, context_df def to_dict(self): return { "parameter_space": self.parameter_space, "context_space": self.context_space, "objective_space": self.objective_space, "objectives": [objective_to_dict(objective) for objective in self.objectives] }
class NestedPolynomialObjective(ObjectiveFunctionBase): """A hierarchical function with multiple nested polynomials. The value of num_nested_polynomials controls how many polynomials are created. All polynomials are created according to (nearly identical) configs specified by the polynomial_objective_config value. The configs for each polynomial differ only in their random seeds. The idea here is to provide a more general version of ThreeLevelQuadratic. In ThreeLevelQuadratic we have three two-dimensional, degree two polynomials, and we select between them using the "vertex_height" parameter. Here we have num_nested_polynomials functions, with configurable dimensions, degrees, and coefficient of variation. Optimizing this synthetic function is analogous to optimizing a component with multiple mutually-exclusive implementations. """ def __init__(self, objective_function_config: Point): assert objective_function_config.polynomial_objective_config in PolynomialObjective.CONFIG_SPACE ObjectiveFunctionBase.__init__(self, objective_function_config) # Let's start building the parameter space for it. # self._parameter_space = SimpleHypergrid( name="domain", dimensions=[ CategoricalDimension(name="polynomial_id", values=[id for id in range(self.objective_function_config.num_nested_polynomials)]) ] ) polynomial_objective_config = self.objective_function_config.polynomial_objective_config self._polynomial_objective_config = polynomial_objective_config self._polynomials = [] # Let's create the required number of polynomials. # for i in range(self.objective_function_config.num_nested_polynomials): polynomial_objective_config.seed += i + 1 # Change the seed so that it's still effective but also reproducible. polynomial = PolynomialObjectiveWrapper(polynomial_objective_config, domain_name=f"domain_{i}") self._polynomials.append(polynomial) self._parameter_space.join( subgrid=polynomial.parameter_space, on_external_dimension=CategoricalDimension(name="polynomial_id", values=[i]) ) self._output_space = SimpleHypergrid( name='output_space', dimensions=[ ContinuousDimension(name='y', min=-math.inf, max=math.inf) ] ) @property def parameter_space(self) -> Hypergrid: return self._parameter_space @property def output_space(self) -> Hypergrid: return self._output_space def evaluate_point(self, point: Point) -> Point: selected_polynomial = self._polynomials[point.polynomial_id] return selected_polynomial.evaluate_point(point[f"domain_{point.polynomial_id}"]) def evaluate_dataframe(self, dataframe: pd.DataFrame) -> pd.DataFrame: # For now: values = [] for i in range(len(dataframe.index)): row = dataframe.loc[[i]] point = Point.from_dataframe(row) value = self.evaluate_point(point) values.append(value.y) return pd.DataFrame({'y': values}) def get_context(self) -> Point: """ Returns the config used to create the polynomial. Down the road it could return some more info about the resulting polynomial. :return: """ return self._polynomial_objective_config