def test_categorical_variables(): np.random.seed(123) def objective(x): return np.array(np.sum(x, axis=1).reshape(-1, 1)) carol_spirits = ["past", "present", "yet to come"] encoding = OneHotEncoding(carol_spirits) parameter_space = ParameterSpace( [ContinuousParameter("real_param", 0.0, 1.0), CategoricalParameter("categorical_param", encoding)] ) random_design = LatinDesign(parameter_space) x_init = random_design.get_samples(10) assert x_init.shape == (10, 4) assert np.all(np.logical_or(x_init[:, 1:3] == 0.0, x_init[:, 1:3] == 1.0)) y_init = objective(x_init) gpy_model = GPy.models.GPRegression(x_init, y_init) gpy_model.Gaussian_noise.fix(1) model = GPyModelWrapper(gpy_model) loop = ExperimentalDesignLoop(parameter_space, model) loop.run_loop(objective, 5) assert len(loop.loop_state.Y) == 15 assert np.all(np.logical_or(loop.loop_state.X[:, 1:3] == 0.0, loop.loop_state.X[:, 1:3] == 1.0))
def create_model_free_designs(space: ParameterSpace): return [RandomDesign(space), LatinDesign(space), SobolDesign(space)]
def fmin_fabolas(func, space: ParameterSpace, s_min: float, s_max: float, n_iters: int, n_init: int = 20, marginalize_hypers: bool = True) -> LoopState: """ Simple interface for Fabolas which optimizes the hyperparameters of machine learning algorithms by reasoning across training data set subsets. For further details see: Fast Bayesian hyperparameter optimization on large datasets A. Klein and S. Falkner and S. Bartels and P. Hennig and F. Hutter Electronic Journal of Statistics (2017) :param func: objective function which gets a hyperparameter configuration x and training dataset size s as input, and return the validation error and the runtime after training x on s datapoints. :param space: input space :param s_min: minimum training dataset size (linear scale) :param s_max: maximum training dataset size (linear scale) :param n_iters: number of iterations :param n_init: number of initial design points (needs to be smaller than num_iters) :param marginalize_hypers: determines whether to use a MAP estimate or to marginalize over the GP hyperparameters :return: LoopState with all evaluated data points """ initial_design = LatinDesign(space) grid = initial_design.get_samples(n_init) X_init = np.zeros([n_init, grid.shape[1] + 1]) Y_init = np.zeros([n_init, 1]) cost_init = np.zeros([n_init]) subsets = np.array([s_max // 2**i for i in range(2, 10)])[::-1] idx = np.where(subsets < s_min)[0] subsets[idx] = s_min for it in range(n_init): func_val, cost = func(x=grid[it], s=subsets[it % len(subsets)]) X_init[it] = np.concatenate( (grid[it], np.array([subsets[it % len(subsets)]]))) Y_init[it] = func_val cost_init[it] = cost def wrapper(x): y, c = func(x[0, :-1], np.exp(x[0, -1])) return np.array([[y]]), np.array([[c]]) loop = FabolasLoop(X_init=X_init, Y_init=Y_init, cost_init=cost_init, space=space, s_min=s_min, s_max=s_max, marginalize_hypers=marginalize_hypers) loop.run_loop(user_function=UserFunctionWrapper(wrapper), stopping_condition=FixedIterationsStoppingCondition(n_iters - n_init)) return loop.loop_state