def test_pareto_frontier_volume_simple(self): """A simple sanity test on the pareto frontier volume computations. """ # Let's generate a pareto frontier in 2D. ALl points lay on a line y = 1 - x x = np.linspace(start=0, stop=1, num=100) y = 1 - x pareto_df = pd.DataFrame({'x': x, 'y': y}) optimization_problem = OptimizationProblem( parameter_space=None, objective_space=SimpleHypergrid(name='objectives', dimensions=[ ContinuousDimension(name='x', min=0, max=1), ContinuousDimension(name='y', min=0, max=1) ]), objectives=[ Objective(name='x', minimize=False), Objective(name='y', minimize=False) ]) pareto_frontier = ParetoFrontier(optimization_problem, pareto_df) pareto_volume_estimator = pareto_frontier.approximate_pareto_volume( num_samples=1000000) lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume( alpha=0.05) print(lower_bound, upper_bound) assert 0.49 < lower_bound < upper_bound < 0.51
def test_basic_functionality_on_2d_objective_space(self): """Basic sanity check. Mainly used to help us develop the API. """ # Let's just create a bunch of random points, build a pareto frontier # and verify that the invariants hold. # parameter_space = SimpleHypergrid( name='params', dimensions=[ ContinuousDimension(name='x1', min=0, max=10) ] ) objective_space = SimpleHypergrid( name='objectives', dimensions=[ ContinuousDimension(name='y1', min=0, max=10), ContinuousDimension(name='y2', min=0, max=10) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ] ) num_rows = 100000 random_objectives_df = objective_space.random_dataframe(num_rows) # They don't match but they don't need to for this test. # random_params_df = parameter_space.random_dataframe(num_rows) pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=random_objectives_df, parameters_df=random_params_df ) pareto_df = pareto_frontier.pareto_df non_pareto_index = random_objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (random_objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def setup_class(cls): """ Set's up all the objects needed to test the UtilityFunctionOptimizers To test the UtilityFunctionOptimizers we need to first construct: * an objective function for the model to approximate and its corresponding parameter and output spaces * an optimization problem * a regression model, then train it on some random parameters to the objective function * a utility function that utilizes the model * a pareto frontier over the random parameters And only then do we get to test our utility function optimizers. This is a lot of work and a somewhat cleaner approach would be to simply create an instance of the BayesianOptimizer to do all of the above for us, but then we might not be able to test the utility function optimizers as thoroughly as we need to. :return: """ global_values.declare_singletons() global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0) cls.logger = create_logger("TestUtilityFunctionOptimizers") cls.model_config = multi_objective_pass_through_model_config_store.default cls.model = MultiObjectivePassThroughModelForTesting( model_config=cls.model_config, logger=cls.logger ) cls.objective_function = cls.model.objective_function cls.parameter_space = cls.objective_function.parameter_space cls.objective_space = cls.objective_function.output_space cls.optimization_problem = cls.objective_function.default_optimization_problem cls.utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) cls.utility_function = ConfidenceBoundUtilityFunction( function_config=cls.utility_function_config, surrogate_model=cls.model, minimize=cls.optimization_problem.objectives[0].minimize, logger=cls.logger ) # To make the pareto frontier we have to generate some random points. # cls.parameters_df = cls.objective_function.parameter_space.random_dataframe(1000) cls.objectives_df = cls.objective_function.evaluate_dataframe(cls.parameters_df) cls.pareto_frontier = ParetoFrontier( optimization_problem=cls.optimization_problem, objectives_df=cls.objectives_df, parameters_df=cls.parameters_df )
def test_pareto_shape(self, function_config_name): """Tests if the pareto frontier has the expected shape. For no phase difference, we would expect a pareto frontier to be a single point. For a phase difference of pi / 2 we would expect the pareto frontier to be on a quarter circle. For a phase difference of pi we would expect the pareto frontier to be on a diagonal. """ function_config = multi_objective_enveloped_waves_config_store.get_config_by_name(function_config_name) objective_function = MultiObjectiveEnvelopedWaves(function_config) optimization_problem = OptimizationProblem( parameter_space=objective_function.parameter_space, objective_space=objective_function.output_space, objectives=[Objective(name=dim_name, minimize=False) for dim_name in objective_function.output_space.dimension_names] ) # Let's create a meshgrid of all params. # TODO: add this as a function in Hypergrids num_points = 100 if function_config_name != "pi_phase_difference" else 10 linspaces = [dimension.linspace(num_points) for dimension in objective_function.parameter_space.dimensions] meshgrids = np.meshgrid(*linspaces) flat_meshgrids = [meshgrid.flatten() for meshgrid in meshgrids] params_df = pd.DataFrame({ dim_name: flat_meshgrid for dim_name, flat_meshgrid in zip(objective_function.parameter_space.dimension_names, flat_meshgrids) }) objectives_df = objective_function.evaluate_dataframe(params_df) pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem, objectives_df=objectives_df, parameters_df=params_df) pareto_df = pareto_frontier.pareto_df if function_config_name == "no_phase_difference": # Let's assert that the optimum is close to 4 and that all selected params are close to half of pi. assert len(pareto_df.index) == 1 for objective in optimization_problem.objectives: assert abs(pareto_df[objective.name].iloc[0] - 3) < 0.001 optimal_params_df = params_df.iloc[pareto_df.index] for param_name in objective_function.parameter_space.dimension_names: assert abs(optimal_params_df[param_name].iloc[0] - math.pi / 2) < 0.02 if function_config_name == "half_pi_phase_difference": expected_radius = 3 pareto_df['radius'] = np.sqrt(pareto_df['y0'] ** 2 + pareto_df['y1'] ** 2) pareto_df['error'] = pareto_df['radius'] - expected_radius assert (np.abs(pareto_df['error']) < 0.01).all() if function_config_name == "pi_phase_difference": # We expect that the absolute values of our objectives will be nearly identical. # assert (np.abs(pareto_df['y0'] + pareto_df['y1']) < 0.01).all()
def test_repeated_values(self): """Validates that the algorithm does its job in the presence of repeated values. :return: """ optimization_problem = OptimizationProblem( parameter_space=None, objective_space=SimpleHypergrid( name="objectives", dimensions=[ ContinuousDimension(name='y1', min=0, max=5), ContinuousDimension(name='y2', min=0, max=5) ] ), objectives=[ Objective(name='y1', minimize=False), Objective(name='y2', minimize=False) ] ) expected_pareto_df = pd.DataFrame( [ [1, 2], [1, 2], [2, 1], [0.5, 2], [1, 1], [2, 0.5] ], columns=['y1', 'y2'] ) dominated_df = pd.DataFrame( [ [0.5, 0.5], [0.5, 1], [0.5, 1.5], [1, 0.5], [1.5, 0.5] ], columns=['y1', 'y2'] ) all_objectives_df = pd.concat([dominated_df, expected_pareto_df]) pareto_frontier = ParetoFrontier( optimization_problem, objectives_df=all_objectives_df, parameters_df=pd.DataFrame(index=all_objectives_df.index) ) computed_pareto_df = pareto_frontier.pareto_df assert computed_pareto_df.sort_values(by=['y1','y2']).equals(expected_pareto_df.sort_values(by=['y1', 'y2']))
def _prepare_dummy_model_based_test_artifacts(self, dummy_model_config, logger): """Prepares all the artifacts we need to create and run a utility function optimizer. I chose to create them here rather than in setup class, to avoid unnecessarily creating all possible combinations for all possible tests. It's easier and cheaper to produce this artifacts just in time, rather than upfront. I suspect that pytest has a functionality to accomplish just this, but haven't found it yet. We need to produce: * an optimization problem * a model * a utility function * pareto frontier """ model = MultiObjectivePassThroughModelForTesting(model_config=dummy_model_config, logger=logger) objective_function = model.objective_function optimization_problem = objective_function.default_optimization_problem # Let's create the pareto frontier. # params_df = objective_function.parameter_space.random_dataframe(1000) objectives_df = objective_function.evaluate_dataframe(params_df) pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=objectives_df, parameters_df=params_df ) if len(optimization_problem.objectives) == 1: utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) utility_function=ConfidenceBoundUtilityFunction( function_config=utility_function_config, surrogate_model=model, minimize=optimization_problem.objectives[0].minimize, logger=logger ) else: utility_function_config = multi_objective_probability_of_improvement_utility_function_config_store.default utility_function = MultiObjectiveProbabilityOfImprovementUtilityFunction( function_config=utility_function_config, pareto_frontier=pareto_frontier, surrogate_model=model, logger=logger ) return optimization_problem, model, utility_function, pareto_frontier
def test_hyperspheres(self, minimize, num_output_dimensions, num_points): """Uses a hypersphere to validate that ParetoFrontier can correctly identify pareto-optimal points.""" hypersphere_radius = 10 objective_function_config = Point( implementation=Hypersphere.__name__, hypersphere_config=Point( num_objectives=num_output_dimensions, minimize=minimize, radius=hypersphere_radius ) ) objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config) optimization_problem = objective_function.default_optimization_problem random_params_df = optimization_problem.parameter_space.random_dataframe(num_points) # Let's randomly subsample 10% of points in random_params_df and make those points pareto optimal. # optimal_points_index = random_params_df.sample( frac=0.1, replace=False, axis='index' ).index random_params_df.loc[optimal_points_index, ['radius']] = hypersphere_radius objectives_df = objective_function.evaluate_dataframe(dataframe=random_params_df) # Conveniently, we can double check all of our math by invoking Pythagoras. Basically: # # assert y0**2 + y1**2 + ... == radius**2 # assert (np.power(objectives_df, 2).sum(axis=1) - np.power(random_params_df["radius"], 2) < 0.000001).all() # Just a few more sanity checks before we do the pareto computation. # if minimize == "all": assert (objectives_df <= 0).all().all() elif minimize == "none": assert (objectives_df >= 0).all().all() else: for column, minimize_column in zip(objectives_df, objective_function.minimize_mask): if minimize_column: assert (objectives_df[column] <= 0).all() else: assert (objectives_df[column] >= 0).all() pareto_frontier = ParetoFrontier( optimization_problem=optimization_problem, objectives_df=objectives_df, parameters_df=random_params_df ) pareto_df = pareto_frontier.pareto_df # We know that all of the pareto efficient points must be on the frontier. # assert optimal_points_index.difference(pareto_df.index.intersection(optimal_points_index)).empty assert len(pareto_df.index) >= len(optimal_points_index) # If we flip all minimized objectives, we can assert on even more things. # for column, minimize_column in zip(objectives_df, objective_function.minimize_mask): if minimize_column: objectives_df[column] = -objectives_df[column] pareto_df[column] = - pareto_df[column] non_pareto_index = objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def test_pareto_frontier_volume_on_hyperspheres(self, minimize, num_dimensions): """Uses a known formula for the volume of the hyperspheres to validate the accuracy of the pareto frontier estimate. :return: """ hypersphere_radius = 10 inscribed_hypersphere_radius = 7 # For computing lower bound on volume # In order to validate the estimates, we must know the allowable upper and lower bounds. # We know that the estimate should not be higher than the volume of the n-ball (ball in n-dimensions). # We can also come up with a lower bound, by computing a volume of a slightly smaller ball inscribed # into the hypersphere. Note that the volume of an n-ball can be computed recursively, so we keep track # of n-ball volumes in lower dimensions. upper_bounds_on_sphere_volume_by_num_dimensions = {} lower_bounds_on_sphere_volume_by_num_dimensions = {} # Compute the base cases for the recursion. # upper_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (hypersphere_radius ** 2) upper_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (hypersphere_radius ** 3) lower_bounds_on_sphere_volume_by_num_dimensions[2] = np.pi * (inscribed_hypersphere_radius ** 2) lower_bounds_on_sphere_volume_by_num_dimensions[3] = (4 / 3) * np.pi * (inscribed_hypersphere_radius ** 3) # Compute the recursive values. # for n in range(4, num_dimensions + 1): upper_bounds_on_sphere_volume_by_num_dimensions[n] = upper_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (hypersphere_radius ** 2) / n lower_bounds_on_sphere_volume_by_num_dimensions[n] = lower_bounds_on_sphere_volume_by_num_dimensions[n-2] * 2 * np.pi * (inscribed_hypersphere_radius ** 2) / n objective_function_config = Point( implementation=Hypersphere.__name__, hypersphere_config=Point( num_objectives=num_dimensions, minimize=minimize, radius=hypersphere_radius ) ) objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config) parameter_space = objective_function.parameter_space num_points = max(4, num_dimensions) linspaces = [] for dimension in parameter_space.dimensions: if dimension.name == 'radius': linspaces.append(np.array([hypersphere_radius])) else: linspaces.append(dimension.linspace(num_points)) meshgrids = np.meshgrid(*linspaces) reshaped_meshgrids = [meshgrid.reshape(-1) for meshgrid in meshgrids] params_df = pd.DataFrame({ dim_name: reshaped_meshgrids[i] for i, dim_name in enumerate(parameter_space.dimension_names) }) objectives_df = objective_function.evaluate_dataframe(params_df) pareto_frontier = ParetoFrontier( optimization_problem=objective_function.default_optimization_problem, objectives_df=objectives_df, parameters_df=params_df ) print("Num points in pareto frontier: ", len(objectives_df.index)) assert len(pareto_frontier.pareto_df.index) == len(objectives_df.index) pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(num_samples=1000000) ci_lower_bound, ci_upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(alpha=0.05) lower_bound_on_pareto_volume = lower_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions) upper_bound_on_pareto_volume = upper_bounds_on_sphere_volume_by_num_dimensions[num_dimensions] / (2**num_dimensions) print("True bounds:", lower_bound_on_pareto_volume, upper_bound_on_pareto_volume) print("CI bounds: ", ci_lower_bound, ci_upper_bound) assert lower_bound_on_pareto_volume <= ci_lower_bound <= ci_upper_bound <= upper_bound_on_pareto_volume
def __init__(self, optimization_problem: OptimizationProblem, optimizer_config: Point, logger=None): if logger is None: logger = create_logger("BayesianOptimizer") self.logger = logger # Let's initialize the optimizer. # OptimizerBase.__init__(self, optimization_problem) assert not optimization_problem.objective_space.is_hierarchical( ), "Not supported." assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config." self.surrogate_model_output_space = optimization_problem.objective_space self.optimizer_config = optimizer_config self.pareto_frontier: ParetoFrontier = ParetoFrontier( optimization_problem=self.optimization_problem, objectives_df=None) # Now let's put together the surrogate model. # assert self.optimizer_config.surrogate_model_implementation in ( HomogeneousRandomForestRegressionModel.__name__, MultiObjectiveHomogeneousRandomForest.__name__) # Note that even if the user requested a HomogeneousRandomForestRegressionModel, we still create a MultiObjectiveRegressionModel # with just a single RandomForest inside it. This means we have to maintain only a single interface. # self.surrogate_model: MultiObjectiveRegressionModel = MultiObjectiveHomogeneousRandomForest( model_config=self.optimizer_config. homogeneous_random_forest_regression_model_config, input_space=self.optimization_problem.feature_space, output_space=self.surrogate_model_output_space, logger=self.logger) # Now let's put together the experiment designer that will suggest parameters for each experiment. # assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__ self.experiment_designer = ExperimentDesigner( designer_config=self.optimizer_config.experiment_designer_config, optimization_problem=self.optimization_problem, pareto_frontier=self.pareto_frontier, surrogate_model=self.surrogate_model, logger=self.logger) self._optimizer_convergence_state = BayesianOptimizerConvergenceState( surrogate_model_fit_state=self.surrogate_model.fit_state) # Also let's make sure we have the dataframes we need for the surrogate model. # self._parameter_names = [ dimension.name for dimension in self.optimization_problem.parameter_space.dimensions ] self._parameter_names_set = set(self._parameter_names) self._context_names = ([ dimension.name for dimension in self.optimization_problem.context_space.dimensions ] if self.optimization_problem.context_space else []) self._context_names_set = set(self._context_names) self._target_names = [ dimension.name for dimension in self.optimization_problem.objective_space.dimensions ] self._target_names_set = set(self._target_names) self._parameter_values_df = pd.DataFrame(columns=self._parameter_names) self._context_values_df = pd.DataFrame(columns=self._context_names) self._target_values_df = pd.DataFrame(columns=self._target_names)
def test_hyperspheres(self, minimize, num_output_dimensions, num_points): """Uses a hypersphere to validate that ParetoFrontier can correctly identify pareto-optimal points. The idea is that we want to find a pareto frontier that optimizes the cartesian coordinates of points defined using random spherical coordinates. By setting the radius of some of the points to the radius of the hypersphere, we guarantee that they are non-dominated. Such points must appear on the pareto frontier, though it's quite possible that other non-dominated points from the interior of the sphere could appear as well. The intuition in 2D is that we can draw a secant between two neighboring pareto efficient points on the perimeter. Any point that is between that secant and the perimeter is not dominated and would thus be pareto efficient as well. (Actually even more points are pareto efficient, but this subset is easiest to explain in text). We want to test scenarios where: 1) all objectives are maximized, 2) all objectives are minimized, 3) some objectives are maximized and some are minimized. We want to be able to do that for an arbitrary number of dimensions so as to extract maximum coverage from this simple test. How the test works? ------------------- For N objectives we will specify the following parameters: 1. radius - distance of a point from origin. 2. theta0, theta1, ..., theta{i}, ..., theta{N-1} - angle between the radius segment and the and the hyperplane containing unit vectors along y0, y1, ..., y{i-1} And the following N objectives that are computed from parameters: y0 = radius * cos(theta0) y1 = radius * sin(theta0) * cos(theta1) y2 = radius * sin(theta0) * sin(theta1) * cos(theta2) y3 = radius * sin(theta0) * sin(theta1) * sin(theta2) * cos(theta3) ... y{N-2} = radius * sin(theta0) * sin(theta1) * ... * sin(theta{N-2}) * cos(theta{N-1}) y{N-1} = radius * sin(theta0) * sin(theta1) * ... * sin(theta{N-2}) * sin(theta{N-1}) ^ !! sin instead of cos !! 1) Maximizing all objectives. To maximize all objectives we need to be them to be non-negative. In such as setup all points with r == sphere_radius will be pareto efficient. And we can assert that the computed pareto frontier contains them. This can be guaranteed, by keeping all angles theta in the first quadrant (0 .. pi/2) since both sin and cos are positive there. Thus their product will be too. 2) Minimizing all objectives. Similarily, to minimize all objectives we need them to be non-positive. In such a setup we know that all points with r == sphere_radius are pareto efficient and we can assert that they are returned in the computation. We observe that all objectives except for the last one contain any number of sin factors and a single cosine factor. Cosine is guaranteed to be negative in the second quadrant (pi/2 .. pi) and sine is guaranteed to be positive there. So keeping all thetas in the range [pi/2 .. pi] makes all objectives negative except for the last one (which we can simply flip manually) 3) Maximizing some objectives while minimizing others. We can take advantage of the fact that every second objective has an odd number of sin factors, whilst the rest has has an even number (again, except for the last one). So if we keep all sin factors negative, and all the cos factors positive, we get a neat situation of alternating objectives` signs. This is true in the fourth quadrant (3 * pi / 2 .. 2 * pi), where sin values are negative, and cos values are positive. The last objective - y{N-1} - will have N negative terms, so it will be positive if (N % 2) == 0 and negative otherwise. In other words: if (N % 2) == 0: maximize y{N-1} else: minimize y{N-1} :param self: :return: """ hypersphere_radius = 10 # Let's figure out the quadrant and which objectives to minimize. # theta_min = None theta_max = None minimize_mask: List[bool] = [] if minimize == "all": # Let's keep angles in second quadrant. # theta_min = math.pi / 2 theta_max = math.pi minimize_mask = [True for _ in range(num_output_dimensions)] elif minimize == "none": # Let's keep all angles in the first quadrant. # theta_min = 0 theta_max = math.pi / 2 minimize_mask = [False for _ in range(num_output_dimensions)] elif minimize == "some": # Let's keep all angles in the fourth quadrant. # theta_min = 1.5 * math.pi theta_max = 2 * math.pi # Let's minimize odd ones, that way the y{N-1} doesn't require a sign flip. # minimize_mask = [(i % 2) == 1 for i in range(num_output_dimensions)] else: assert False # Let's put together the optimization problem. # parameter_dimensions = [ContinuousDimension(name="radius", min=0, max=hypersphere_radius)] for i in range(num_output_dimensions): parameter_dimensions.append(ContinuousDimension(name=f"theta{i}", min=theta_min, max=theta_max)) parameter_space = SimpleHypergrid( name='spherical_coordinates', dimensions=parameter_dimensions ) objective_space = SimpleHypergrid( name='rectangular_coordinates', dimensions=[ ContinuousDimension(name=f"y{i}", min=0, max=hypersphere_radius) for i in range(num_output_dimensions) ] ) optimization_problem = OptimizationProblem( parameter_space=parameter_space, objective_space=objective_space, objectives=[Objective(name=f'y{i}', minimize=minimize_objective) for i, minimize_objective in enumerate(minimize_mask)] ) random_params_df = optimization_problem.feature_space.random_dataframe(num_points) # Let's randomly subsample 10% of points in random_params_df and make those points pareto optimal. # optimal_points_index = random_params_df.sample( frac=0.1, replace=False, axis='index' ).index random_params_df.loc[optimal_points_index, ['spherical_coordinates.radius']] = hypersphere_radius # We can compute our objectives more efficiently, by maintaining a prefix of r * sin(theta0) * ... * sin(theta{i-1}) # prefix = random_params_df['spherical_coordinates.radius'] objectives_df = pd.DataFrame() for i in range(num_output_dimensions-1): objectives_df[f'y{i}'] = prefix * np.cos(random_params_df[f'spherical_coordinates.theta{i}']) prefix = prefix * np.sin(random_params_df[f'spherical_coordinates.theta{i}']) # Conveniently, by the time the loop exits, the prefix is the value of our last objective. # if minimize == "all": # Must flip the prefix first, since there was no negative cosine to do it for us. # objectives_df[f'y{num_output_dimensions-1}'] = -prefix else: objectives_df[f'y{num_output_dimensions - 1}'] = prefix # Just as conveniently, we can double check all of our math by invoking Pythagoras. Basically: # # assert y0**2 + y1**2 + ... == radius**2 # assert (np.power(objectives_df, 2).sum(axis=1) - np.power(random_params_df["spherical_coordinates.radius"], 2) < 0.000001).all() # Just a few more sanity checks before we do the pareto computation. # if minimize == "all": assert (objectives_df <= 0).all().all() elif minimize == "none": assert (objectives_df >= 0).all().all() else: for column, minimize_column in zip(objectives_df, minimize_mask): if minimize_column: assert (objectives_df[column] <= 0).all() else: assert (objectives_df[column] >= 0).all() pareto_df = ParetoFrontier.compute_pareto( optimization_problem=optimization_problem, objectives_df=objectives_df ) # We know that all of the pareto efficient points must be on the frontier. # assert optimal_points_index.difference(pareto_df.index.intersection(optimal_points_index)).empty assert len(pareto_df.index) >= len(optimal_points_index) # If we flip all minimized objectives, we can assert on even more things. # for column, minimize_column in zip(objectives_df, minimize_mask): if minimize_column: objectives_df[column] = -objectives_df[column] pareto_df[column] = - pareto_df[column] non_pareto_index = objectives_df.index.difference(pareto_df.index) for i, row in pareto_df.iterrows(): # Now let's make sure that no point in pareto is dominated by any non-pareto point. # assert (objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index) # Let's also make sure that no point on the pareto is dominated by any other point there. # other_rows = pareto_df.index.difference([i]) assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
def test_optimizers_against_untrained_models(self, objective_function_config_name, utility_function_type_name, utility_function_optimizer_type_name): """Tests that the utility function optimizers throw appropriate exceptions when the utility function cannot be evaluated. :return: """ self.logger.info(f"Creating test artifacts for objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.") model_config = homogeneous_random_forest_config_store.default objective_function_config = objective_function_config_store.get_config_by_name(objective_function_config_name) objective_function = ObjectiveFunctionFactory.create_objective_function(objective_function_config=objective_function_config) optimization_problem = objective_function.default_optimization_problem model = MultiObjectiveHomogeneousRandomForest( model_config=model_config, input_space=optimization_problem.feature_space, output_space=optimization_problem.objective_space, logger=self.logger ) pareto_frontier = ParetoFrontier(optimization_problem=optimization_problem) if utility_function_type_name == ConfidenceBoundUtilityFunction.__name__: utility_function_config = Point(utility_function_name="upper_confidence_bound_on_improvement", alpha=0.05) utility_function = ConfidenceBoundUtilityFunction( function_config=utility_function_config, surrogate_model=model, minimize=optimization_problem.objectives[0].minimize, logger=self.logger ) elif utility_function_type_name == MultiObjectiveProbabilityOfImprovementUtilityFunction.__name__: utility_function_config = multi_objective_probability_of_improvement_utility_function_config_store.default utility_function = MultiObjectiveProbabilityOfImprovementUtilityFunction( function_config=utility_function_config, pareto_frontier=pareto_frontier, surrogate_model=model, logger=self.logger ) else: assert False if utility_function_optimizer_type_name == RandomSearchOptimizer.__name__: utility_function_optimizer_config = random_search_optimizer_config_store.default elif utility_function_optimizer_type_name == GlowWormSwarmOptimizer.__name__: utility_function_optimizer_config = glow_worm_swarm_optimizer_config_store.default elif utility_function_optimizer_type_name == RandomNearIncumbentOptimizer.__name__: utility_function_optimizer_config = random_near_incumbent_optimizer_config_store.default else: assert False, f"Unknown utility_function_optimizer_type_name: {utility_function_optimizer_type_name}" utility_function_optimizer = UtilityFunctionOptimizerFactory.create_utility_function_optimizer( utility_function=utility_function, optimizer_type_name=utility_function_optimizer_type_name, optimizer_config=utility_function_optimizer_config, optimization_problem=optimization_problem, pareto_frontier=pareto_frontier, logger=self.logger ) assert not model.trained self.logger.info("Asserting the optimizer is throwing appropriate exceptions.") num_failed_suggestions = 3 for i in range(num_failed_suggestions): with pytest.raises(expected_exception=UnableToProduceGuidedSuggestionException): utility_function_optimizer.suggest() self.logger.info(f"[{i+1}/{num_failed_suggestions}] worked.") # Now let's train the model a bit and make sure that we can produce the suggestions afterwards # random_params_df = optimization_problem.parameter_space.random_dataframe(1000) objectives_df = objective_function.evaluate_dataframe(random_params_df) features_df = optimization_problem.construct_feature_dataframe(parameters_df=random_params_df) self.logger.info("Training the model") model.fit(features_df=features_df, targets_df=objectives_df, iteration_number=1000) assert model.trained self.logger.info("Model trained.") self.logger.info("Updating pareto.") pareto_frontier.update_pareto(objectives_df=objectives_df, parameters_df=random_params_df) self.logger.info("Pareto updated.") self.logger.info("Asserting suggestions work.") num_successful_suggestions = 3 for i in range(num_successful_suggestions): suggestion = utility_function_optimizer.suggest() assert suggestion in optimization_problem.parameter_space self.logger.info(f"[{i+1}/{num_successful_suggestions}] successfully produced suggestion: {suggestion}") self.logger.info(f"Done testing. Objective function: {objective_function_config_name}, utility_function: {utility_function_optimizer_type_name}, optimizer: {utility_function_optimizer_type_name}.")