예제 #1
0
파일: gp.py 프로젝트: winkywow/tframe
def _get_gp_est(space, **kwargs):
    from skopt.utils import Space
    from skopt.utils import normalize_dimensions
    from skopt.utils import ConstantKernel, HammingKernel, Matern
    from skopt.learning import GaussianProcessRegressor

    # Set space
    space = Space(space)
    space = Space(normalize_dimensions(space.dimensions))
    n_dims = space.transformed_n_dims

    cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
    # If all dimensions are categorical, use Hamming kernel
    if space.is_categorical:
        other_kernel = HammingKernel(length_scale=np.ones(n_dims))
    else:
        other_kernel = Matern(length_scale=np.ones(n_dims),
                              length_scale_bounds=[(0.01, 100)] * n_dims,
                              nu=2.5)

    base_estimator = GaussianProcessRegressor(kernel=cov_amplitude *
                                              other_kernel,
                                              normalize_y=True,
                                              noise="gaussian",
                                              n_restarts_optimizer=2)

    base_estimator.set_params(**kwargs)
    return base_estimator
예제 #2
0
 def prepareScoring(
     self, spaceSpec: typing.Tuple["skopt.space.space.Real",
                                   "skopt.space.space.Integer",
                                   "skopt.space.space.Categorical"]
 ) -> typing.Tuple[int, str, typing.Tuple["skopt.space.space.Real",
                                          "skopt.space.space.Integer",
                                          "skopt.space.space.Categorical"]]:
     from skopt.utils import cook_estimator, normalize_dimensions
     normalized = normalize_dimensions(spaceSpec)
     base_estimator = cook_estimator(self.__class__.skoptAlgo,
                                     space=normalized,
                                     random_state=None)
     optimizer = skopt.Optimizer(
         normalized,
         base_estimator,
         n_initial_points=0,
         acq_func=self.acquisitionType,
         acq_optimizer=self.acquisitionOptimizerType,
         acq_optimizer_kwargs={
             "n_points": self.iters,
             "n_restarts_optimizer": self.nRestartsOptimizer,
             "n_jobs": self.jobs
         },
         acq_func_kwargs={
             "xi": self.chi,
             "kappa": self.kappa
         })
     return (self.iters, "SKOpt (" + self.__class__.skoptAlgo + ")",
             optimizer)
예제 #3
0
    def setup_tuner(self):
        self.tunecfg = self.experiment["tuner"]
        self.parameters = list(self.tunecfg["parameters"].keys())
        self.dimensions = self.parse_dimensions(self.tunecfg["parameters"])
        self.space = normalize_dimensions(self.dimensions)
        self.priors = self.parse_priors(self.tunecfg["priors"])

        self.kernel = ConstantKernel(
            constant_value=self.tunecfg.get("variance_value", 0.1**2),
            constant_value_bounds=tuple(
                self.tunecfg.get("variance_bounds", (0.01**2, 0.5**2))),
        ) * Matern(
            length_scale=self.tunecfg.get("length_scale_value", 0.3),
            length_scale_bounds=tuple(
                self.tunecfg.get("length_scale_bounds", (0.2, 0.8))),
            nu=2.5,
        )
        self.opt = Optimizer(
            dimensions=self.dimensions,
            n_points=self.tunecfg.get("n_points", 1000),
            n_initial_points=self.tunecfg.get("n_initial_points",
                                              5 * len(self.dimensions)),
            gp_kernel=self.kernel,
            gp_kwargs=dict(normalize_y=True),
            gp_priors=self.priors,
            acq_func=self.tunecfg.get("acq_func", "ts"),
            acq_func_kwargs=self.tunecfg.get(
                "acq_func_kwargs",
                None),  # TODO: Check if this works for all parameters
            random_state=self.rng.randint(0,
                                          np.iinfo(np.int32).max),
        )
예제 #4
0
    def __init__(
        self,
        dimensions,
        n_points=500,
        n_initial_points=10,
        init_strategy="r2",
        gp_kernel=None,
        gp_kwargs=None,
        gp_priors=None,
        acq_func="pvrs",
        acq_func_kwargs=None,
        random_state=None,
        **kwargs
    ):
        self.rng = check_random_state(random_state)

        if callable(acq_func):
            self.acq_func = acq_func
        else:
            self.acq_func = ACQUISITION_FUNC[acq_func]
        if acq_func_kwargs is None:
            acq_func_kwargs = dict()
        self.acq_func_kwargs = acq_func_kwargs

        self.space = normalize_dimensions(dimensions)
        self._n_initial_points = n_initial_points
        self.n_initial_points_ = n_initial_points
        self.init_strategy = init_strategy
        if self.init_strategy == "r2":
            self._initial_points = self.space.inverse_transform(
                r2_sequence(n=n_initial_points, d=self.space.n_dims)
            )
        self.n_points = n_points

        if gp_kwargs is None:
            gp_kwargs = dict()
        if gp_kernel is None:
            # For now the default kernel is not adapted to the dimensions,
            # which is why a simple list is passed:
            gp_kernel = construct_default_kernel(
                list(range(self.space.transformed_n_dims))
            )

        self.gp = BayesGPR(
            kernel=gp_kernel,
            random_state=self.rng.randint(0, np.iinfo(np.int32).max),
            **gp_kwargs,
        )
        # We are only able to guess priors now, since BayesGPR can add
        # another WhiteKernel, when noise is set to "gaussian":
        if gp_priors is None:
            gp_priors = guess_priors(self.gp.kernel)
        self.gp_priors = gp_priors

        self.Xi = []
        self.yi = []
        self.noisei = []
        self._next_x = None
예제 #5
0
    def __init__(
        self,
        dimensions,
        n_points=500,
        n_initial_points=10,
        init_strategy="sb",
        gp_kernel=None,
        gp_kwargs=None,
        gp_priors=None,
        acq_func="pvrs",
        acq_func_kwargs=None,
        random_state=None,
        **kwargs,
    ):
        self.rng = check_random_state(random_state)

        if callable(acq_func):
            self.acq_func = acq_func
        else:
            self.acq_func = ACQUISITION_FUNC[acq_func]
        if acq_func_kwargs is None:
            acq_func_kwargs = {}
        self.acq_func_kwargs = acq_func_kwargs

        self.space = normalize_dimensions(dimensions)
        self._n_initial_points = n_initial_points
        self.n_initial_points_ = n_initial_points
        self.init_strategy = init_strategy
        if self.init_strategy == "r2":
            self._initial_points = self.space.inverse_transform(
                r2_sequence(n=n_initial_points, d=self.space.n_dims)
            )
        elif self.init_strategy == "sb":
            self._init_rng = np.random.RandomState(self.rng.randint(2 ** 31))
        self.n_points = n_points

        if gp_kwargs is None:
            gp_kwargs = {}
        if gp_kernel is None:
            # For now the default kernel is not adapted to the dimensions,
            # which is why a simple list is passed:
            gp_kernel = construct_default_kernel(
                list(range(self.space.transformed_n_dims))
            )

        self.gp = BayesGPR(
            kernel=gp_kernel,
            random_state=self.rng.randint(0, np.iinfo(np.int32).max),
            **gp_kwargs,
        )
        self.gp_priors = gp_priors

        self.Xi = []
        self.yi = []
        self.noisei = []
        self._next_x = None
예제 #6
0
def test_reduce_ranges():
    space = normalize_dimensions([(0.0, 1.0), ("a", "b", "c")])
    x = ((0.0, "a"), (1.01, "a"), (0.5, "d"), (1.0, "c"))
    y = (0.0, 1.0, 2.0, 3.0)
    noise = (0.1, 0.2, 0.3, 0.4)
    reduction_needed, x_new, y_new, noise_new = reduce_ranges(x, y, noise, space)
    assert reduction_needed
    assert tuple(x_new) == ((0.0, "a"), (1.0, "c"))
    assert tuple(y_new) == (0.0, 3.0)
    assert tuple(noise_new) == (0.1, 0.4)
예제 #7
0
def test_normalize_bounds():
    bounds = [(-999, 189000), Categorical((True, False))]
    space = Space(normalize_dimensions(bounds))
    for a in np.linspace(1e-9, 0.4999, 1000):
        x = space.inverse_transform([[a, a]])
        check_limits(x[0][0], -999, 189000)
        y = space.transform(x)
        check_limits(y, 0., 1.)
    for a in np.linspace(0.50001, 1e-9 + 1., 1000):
        x = space.inverse_transform([[a, a]])
        check_limits(x[0][0], -999, 189000)
        y = space.transform(x)
        check_limits(y, 0., 1.)
예제 #8
0
def create_opt(lines, ranker_name):
    gp_seed, opt_seed = get_seed(lines)
    _ranker_class = object_rankers[ranker_name]
    _ranker_class._use_early_stopping = True
    param_ranges = _ranker_class.set_tunable_parameter_ranges({})
    transformed = []
    for param in param_ranges:
        transformed.append(check_dimension(param))
    space = normalize_dimensions(transformed)
    base_estimator = cook_estimator("GP",
                                    space=space,
                                    random_state=gp_seed,
                                    noise="gaussian")
    optimizer = Optimizer(dimensions=param_ranges,
                          random_state=opt_seed,
                          base_estimator=base_estimator)
    return optimizer
예제 #9
0
    def set_optimizer(self, n_iter, opt_seed, acq_func, gp_seed, **kwargs):
        self.logger.info('Retrieving model stored at: {}'.format(self.optimizer_path))
        try:
            optimizer = load(self.optimizer_path)
            self.logger.info('Loading model stored at: {}'.format(self.optimizer_path))
            finished_iter = np.array(optimizer.yi).shape[0]
            if finished_iter == 0:
                optimizer = None
                self.logger.info('Optimizer did not finish any iterations so setting optimizer to null')
        except KeyError:
            self.logger.error('Cannot open the file {}'.format(self.optimizer_path))
            optimizer = None

        except ValueError:
            self.logger.error('Cannot open the file {}'.format(self.optimizer_path))
            optimizer = None
        except FileNotFoundError:
            self.logger.error('No such file or directory: {}'.format(self.optimizer_path))
            optimizer = None

        if optimizer is not None:
            n_iter = n_iter - finished_iter
            if n_iter < 0:
                n_iter = 0
            self.logger.info('Iterations already done: {} and running iterations {}'.format(finished_iter, n_iter))
            self.opt = optimizer
            self.logger.debug('Setting the provided optimizer')
            self.log_best_params()
        else:
            transformed = []
            for param in self.parameter_ranges:
                transformed.append(check_dimension(param))
            self.logger.info("Parameter Space: {}".format(transformed))
            norm_space = normalize_dimensions(transformed)
            self.logger.info("Parameter Space after transformation: {}".format(norm_space))
            categorical_space = np.array([isinstance(s, Categorical) for s in norm_space])
            self.logger.info("categorical_space: {}".format(categorical_space))
            if np.all(categorical_space):
                base_estimator = cook_estimator("RF", space=norm_space, random_state=gp_seed)
            else:
                base_estimator = cook_estimator("GP", space=norm_space, random_state=gp_seed, noise="gaussian")

            self.opt = Optimizer(dimensions=self.parameter_ranges, random_state=opt_seed, base_estimator=base_estimator,
                                 acq_func=acq_func, **kwargs)

        return n_iter
예제 #10
0
def get_x0(flat_base_config, search_space):
    """ Extract a default point from the base configuration,
    replacing any invalid params.
    """
    x0 = [ flat_base_config.get(clean_nested_key(k), 'None') for k in search_space ]

    # Check x0 is in the space before running it
    # and coerce it into the search space with random samples where invalid
    dimensions = list(search_space.values())
    space = Space(normalize_dimensions(dimensions))
    for i, (p,d) in enumerate(zip(x0, space.dimensions)):
        if not p in d:
            sample = d.rvs()
            print(f"{p} not in dimension: {d} with name:{d.name}, setting to sample:{sample}")
            logger.info(f"{p} not in dimension: {d} with name:{d.name}, setting to sample:{sample}")
            x0[i] = sample
    print('x0', x0)
    print('space', space)
    return x0
예제 #11
0
def _patched_gp_base_estimator(dimensions, random_state, noise):
    """Returns a GP non-y-normalizing GP estimator."""
    import numpy as np
    from sklearn.utils import check_random_state
    from skopt.utils import normalize_dimensions

    space = normalize_dimensions(dimensions)
    rng = check_random_state(random_state)

    estimator = skopt.utils.cook_estimator(
        "GP",
        space=space,
        random_state=rng.randint(0,
                                 np.iinfo(np.int32).max),
        noise=noise,
    )
    # The point of this function - setting normalize_y to False.
    estimator.normalize_y = False
    return estimator
예제 #12
0
    def __init__(self,
                 hyper_param_conf,
                 command,
                 expdir,
                 exp_recipe_dir,
                 recipe,
                 computing,
                 exp_proposal_watch_dir=None):
        base_estimator = 'GP'

        self.hyper_param_conf = hyper_param_conf
        self.command = command
        self.expdir = expdir
        self.exp_recipe_dir = exp_recipe_dir
        self.recipe = recipe
        self.computing = computing

        # read the hyper parameter file
        hyper_param_cfg = configparser.ConfigParser()
        hyper_param_cfg.read(hyper_param_conf)

        hyper_info = dict(hyper_param_cfg.items('info'))
        self.hyper_param_names = hyper_info['hyper_params'].split(' ')
        self.num_iters = int(hyper_info['num_iters'])
        self.n_initial_points = int(hyper_info['n_initial_points'])
        self.n_initial_points_to_start = int(
            hyper_info['n_initial_points_to_start'])
        self.max_parallel_jobs = int(hyper_info['max_parallel_jobs'])
        self.selected_segment_length = hyper_info['segment_length']
        self.selected_task = hyper_info['task']

        if 'adapt_hyper_param' in hyper_info:
            self.adapt_param = {
                'param_name': hyper_info['adapt_hyper_param'],
                'param_thr': int(hyper_info['param_thr']),
                'par_cnt_scheme': hyper_info['par_cnt_scheme']
            }
        else:
            self.adapt_param = None

        hyper_param_dict = dict()
        skopt_dims = []
        for par_name in self.hyper_param_names:
            par_dict = dict(hyper_param_cfg.items(par_name))
            par_type = par_dict['type']
            if par_type == 'Integer':
                skopt_dim = skopt_space.Integer(low=int(par_dict['min']),
                                                high=int(par_dict['max']),
                                                name=par_name)

            elif par_type == 'Real':
                skopt_dim = skopt_space.Real(low=float(par_dict['min']),
                                             high=float(par_dict['max']),
                                             name=par_name)

            elif par_type == 'Categorical':
                skopt_dim = skopt_space.Categorical(
                    categories=par_dict['categories'].split(' '),
                    name=par_name)

            else:
                raise ValueError('Type %s is not a valid parameter type' %
                                 par_type)

            hyper_param_dict[par_name] = par_dict
            skopt_dims.append(skopt_dim)

        self.hyper_param_dict = hyper_param_dict
        self.skopt_dims = skopt_dims

        self.last_result = None
        # self.all_results = []

        self.start_new_run_flag = True
        self.iter_ind = 0
        self.watch_list = dict()
        self.all_dim_values = []
        self.all_losses = dict()
        self.n_job_running = 0
        self.n_initial_points_started = 0
        self.n_unsuitable_points_for_estimator = 0
        self.max_n_unsuitable_points_for_estimator = 10000
        self.unsuitable_runs = []
        self.lost_runs = []

        self.exp_proposal_watch_dir = exp_proposal_watch_dir
        self.use_proposal_run = False
        self.proposed_loss_runs = []

        # only 0.25% of the point sample in the hyper space are wanted (since they lead to rougly the wanted amount of
        # trainable parameters)
        self.acq_optimizer_kwargs = {'n_points': 4000000}
        if 'debug' in expdir:
            self.acq_optimizer_kwargs = {'n_points': 40000}

        if base_estimator == 'boundedGP':
            # Make own estimator based on Gaussian Process Regressor.
            if skopt_dims is not None:
                space = Space(skopt_dims)
                space = Space(normalize_dimensions(space.dimensions))
                n_dims = space.transformed_n_dims
                is_cat = space.is_categorical

            else:
                raise ValueError("Expected a Space instance, not None.")

            cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
            # only special if *all* dimensions are categorical
            if is_cat:
                other_kernel = HammingKernel(length_scale=np.ones(n_dims))
            else:
                other_kernel = Matern(length_scale=np.ones(n_dims),
                                      length_scale_bounds=[(0.01, 100)] *
                                      n_dims,
                                      nu=2.5)

            base_estimator = BoundedGaussianProcessRegressor(
                space,
                self.hyper_param_names,
                self.adapt_param,
                kernel=cov_amplitude * other_kernel,
                normalize_y=True,
                noise="gaussian",
                n_restarts_optimizer=2)

        super(HyperParamOptimizer,
              self).__init__(skopt_dims,
                             base_estimator=base_estimator,
                             n_initial_points=self.n_initial_points,
                             acq_optimizer_kwargs=self.acq_optimizer_kwargs)
예제 #13
0
def test_normalize_dimensions_all_categorical():
    dimensions = (['a', 'b', 'c'], ['1', '2', '3'])
    space = normalize_dimensions(dimensions)
    assert space.is_categorical
예제 #14
0
    def fit(self,
            X,
            Y,
            total_duration=6e7,
            n_iter=100,
            cv_iter=None,
            optimizer=None,
            acq_func='gp_hedge',
            **kwargs):
        start = datetime.now()

        def splitter(itr):
            for train_idx, test_idx in itr:
                yield X[train_idx], Y[train_idx], X[test_idx], Y[test_idx]

        def splitter_dict(itr_dict):

            n_splits = len(list(itr_dict.values())[0])
            for i in range(n_splits):
                X_train = dict()
                Y_train = dict()
                X_test = dict()
                Y_test = dict()
                for n_obj, itr in itr_dict.items():
                    train_idx = itr[i][0]
                    test_idx = itr[i][1]
                    X_train[n_obj] = np.copy(X[n_obj][train_idx])
                    X_test[n_obj] = np.copy(X[n_obj][test_idx])
                    Y_train[n_obj] = np.copy(Y[n_obj][train_idx])
                    Y_test[n_obj] = np.copy(Y[n_obj][test_idx])
                yield X_train, Y_train, X_test, Y_test

        if cv_iter is None:
            cv_iter = ShuffleSplit(n_splits=3,
                                   test_size=0.1,
                                   random_state=self.random_state)
        if isinstance(X, dict):
            splits = dict()
            for n_obj, arr in X.items():
                if arr.shape[0] == 1:
                    splits[n_obj] = [([0], [0])
                                     for i in range(cv_iter.n_splits)]
                else:
                    splits[n_obj] = list(cv_iter.split(arr))
        else:
            splits = list(cv_iter.split(X))
        # Pre-compute splits for reuse
        # Here we fix a random seed for all simulations to correlate the random
        # streams:

        seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug(
            'Random seed for the ranking algorithm: {}'.format(seed))
        opt_seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug('Random seed for the optimizer: {}'.format(opt_seed))
        gp_seed = self.random_state.randint(2**32, dtype='uint32')
        self.logger.debug(
            'Random seed for the GP surrogate: {}'.format(gp_seed))

        if optimizer is not None:
            opt = optimizer
            self.logger.debug('Setting the provided optimizer')
            self.log_best_params(opt)
        else:
            transformed = []
            for param in self.parameter_ranges:
                transformed.append(check_dimension(param))
            self.logger.info("Parameter Space: {}".format(transformed))
            space = normalize_dimensions(transformed)
            self.logger.info(
                "Parameter Space after transformation: {}".format(space))

            # Todo: Make this passable
            base_estimator = cook_estimator("GP",
                                            space=space,
                                            random_state=gp_seed,
                                            noise="gaussian")
            opt = Optimizer(dimensions=self.parameter_ranges,
                            random_state=opt_seed,
                            base_estimator=base_estimator,
                            acq_func=acq_func,
                            **kwargs)
        self._callbacks_set_optimizer(opt)
        self._callbacks_on_optimization_begin()
        time_taken = duration_tillnow(start)
        total_duration -= time_taken
        max_fit_duration = -10000
        self.logger.info('Time left for {} iterations is {}'.format(
            n_iter, microsec_to_time(total_duration)))

        try:
            for t in range(n_iter):
                start = datetime.now()
                self._callbacks_on_iteration_begin(t)
                self.logger.info(
                    'Starting optimization iteration: {}'.format(t))
                if t > 0:
                    self.log_best_params(opt)

                next_point = opt.ask()
                self.logger.info('Next parameters:\n{}'.format(next_point))
                results = []
                running_times = []
                if isinstance(X, dict):
                    for X_train, Y_train, X_test, Y_test in splitter_dict(
                            splits):
                        result, time_taken = self._fit_ranker(
                            X_train, Y_train, X_test, Y_test, next_point)
                        running_times.append(time_taken)
                        results.append(result)
                else:
                    for X_train, Y_train, X_test, Y_test in splitter(splits):
                        result, time_taken = self._fit_ranker(
                            X_train, Y_train, X_test, Y_test, next_point)
                        running_times.append(time_taken)
                        results.append(result)

                results = np.array(results)
                running_times = np.array(running_times)
                mean_result = np.mean(results)
                mean_fitting_duration = np.mean(running_times)

                # Storing the maximum time to run the splitting model and adding the time for out of sample evaluation
                if max_fit_duration < np.sum(running_times):
                    max_fit_duration = np.sum(running_times)

                self.logger.info(
                    'Validation error for the parameters is {:.4f}'.format(
                        mean_result))
                self.logger.info('Time taken for the parameters is {}'.format(
                    microsec_to_time(np.sum(running_times))))
                if "ps" in opt.acq_func:
                    opt.tell(next_point, [mean_result, mean_fitting_duration])
                else:
                    opt.tell(next_point, mean_result)
                self._callbacks_on_iteration_end(t)

                self.logger.info(
                    "Main optimizer iterations done {} and saving the model".
                    format(np.array(opt.yi).shape[0]))
                dump(opt, self.optimizer_path)

                time_taken = duration_tillnow(start)
                total_duration -= time_taken
                self.logger.info('Time left for simulations is {} '.format(
                    microsec_to_time(total_duration)))

                if (total_duration - max_fit_duration) < 0:
                    self.logger.info(
                        'At iteration {} maximum time required by model to validate a parameter values'
                        .format(microsec_to_time(max_fit_duration)))
                    self.logger.info(
                        'At iteration {} simulation stops, due to time deficiency'
                        .format(t))
                    break

        except KeyboardInterrupt:
            self.logger.debug(
                'Optimizer interrupted saving the model at {}'.format(
                    self.optimizer_path))
            self.log_best_params(opt)
        else:
            self.logger.debug(
                'Finally, fit a model on the complete training set and storing the model at {}'
                .format(self.optimizer_path))
            self._fit_params["epochs"] = self._fit_params.get("epochs", 1000)
            if "ps" in opt.acq_func:
                best_point = opt.Xi[np.argmin(np.array(opt.yi)[:, 0])]
            else:
                best_point = opt.Xi[np.argmin(opt.yi)]
            self._set_new_parameters(best_point)
            self.model = copy.copy(self.ranker)
            self.model.fit(X, Y, **self._fit_params)

        finally:
            self._callbacks_on_optimization_end()
            self.optimizer = opt
            if np.array(opt.yi).shape[0] != 0:
                dump(opt, self.optimizer_path)
예제 #15
0
def test_categoricals_mixed_types():
    domain = [[1, 2, 3, 4], ['a', 'b', 'c'], [True, False]]
    x = [1, 'a', True]
    space = normalize_dimensions(domain)
    assert (space.inverse_transform(space.transform([x])) == [x])
예제 #16
0
    def run(self):
        """start the tuning process"""
        def objective(var):
            """objective method receive the benchmark result and send the next parameters"""
            iter_result = {}
            option = []
            for i, knob in enumerate(self.knobs):
                params[knob['name']] = var[i]
                if knob['dtype'] == 'string':
                    option.append(knob['options'].index(var[i]))
                else:
                    option.append(var[i])

            iter_result["param"] = params
            self.child_conn.send(iter_result)
            result = self.child_conn.recv()
            x_num = 0.0
            eval_list = result.split(',')
            for value in eval_list:
                num = float(value)
                x_num = x_num + num
            options.append(option)
            performance.append(x_num)
            return x_num

        params = {}
        options = []
        performance = []
        labels = []
        estimator = None

        try:
            if self.engine == 'random' or self.engine == 'forest' or \
                    self.engine == 'gbrt' or self.engine == 'bayes' or self.engine == 'extraTrees':
                params_space = self.build_space()
                ref_x, ref_y = self.transfer()
                if len(ref_x) == 0:
                    if len(self.ref) == 0:
                        ref_x = None
                    else:
                        ref_x = self.ref
                    ref_y = None
                if ref_x is not None and not isinstance(
                        ref_x[0], (list, tuple)):
                    ref_x = [ref_x]
                LOGGER.info('x0: %s', ref_x)
                LOGGER.info('y0: %s', ref_y)

                if ref_x is not None and isinstance(ref_x[0], (list, tuple)):
                    self._n_random_starts = 0 if len(ref_x) >= self._n_random_starts \
                        else self._n_random_starts - len(ref_x) + 1

                LOGGER.info('n_random_starts parameter is: %d',
                            self._n_random_starts)
                LOGGER.info("Running performance evaluation.......")
                if self.engine == 'random':
                    estimator = 'dummy'
                elif self.engine == 'forest':
                    estimator = 'RF'
                elif self.engine == 'extraTrees':
                    estimator = 'ET'
                elif self.engine == 'gbrt':
                    estimator = 'GBRT'
                elif self.engine == 'bayes':
                    params_space = normalize_dimensions(params_space)
                    estimator = cook_estimator("GP",
                                               space=params_space,
                                               noise=self.noise)

                LOGGER.info("base_estimator is: %s", estimator)
                optimizer = baseOpt(dimensions=params_space,
                                    n_random_starts=self._n_random_starts,
                                    random_state=1,
                                    base_estimator=estimator)
                n_calls = self.max_eval
                # User suggested points at which to evaluate the objective first
                if ref_x and ref_y is None:
                    ref_y = list(map(objective, ref_x))
                    LOGGER.info("ref_y is: %s", ref_y)

                # Pass user suggested initialisation points to the optimizer
                if ref_x:
                    if not isinstance(ref_y,
                                      (collections.Iterable, numbers.Number)):
                        raise ValueError(
                            "`ref_y` should be an iterable or a scalar, "
                            "got %s" % type(ref_y))
                    if len(ref_x) != len(ref_y):
                        raise ValueError("`ref_x` and `ref_y` should "
                                         "have the same length")
                    LOGGER.info("ref_x: %s", ref_x)
                    LOGGER.info("ref_y: %s", ref_y)
                    n_calls -= len(ref_y)
                    ret = optimizer.tell(ref_x, ref_y)

                for i in range(n_calls):
                    next_x = optimizer.ask()
                    LOGGER.info("next_x: %s", next_x)
                    LOGGER.info("Running performance evaluation.......")
                    next_y = objective(next_x)
                    LOGGER.info("next_y: %s", next_y)
                    ret = optimizer.tell(next_x, next_y)
                    LOGGER.info("finish (ref_x, ref_y) tell")

            elif self.engine == 'abtest':
                abtuning_manager = ABtestTuningManager(self.knobs,
                                                       self.child_conn,
                                                       self.split_count)
                options, performance = abtuning_manager.do_abtest_tuning_abtest(
                )
                params = abtuning_manager.get_best_params()
                # convert string option into index
                options = abtuning_manager.get_options_index(options)
            elif self.engine == 'gridsearch':
                num_done = 0
                if self.y_ref is not None:
                    num_done = len(self.y_ref)
                gstuning_manager = GridSearchTuningManager(
                    self.knobs, self.child_conn)
                options, performance = gstuning_manager.do_gridsearch(num_done)
                params, labels = gstuning_manager.get_best_params()
                # convert string option into index
                options = gstuning_manager.get_options_index(options)
            elif self.engine == 'lhs':
                from analysis.optimizer.knob_sampling_manager import KnobSamplingManager
                knobsampling_manager = KnobSamplingManager(
                    self.knobs, self.child_conn, self.max_eval,
                    self.split_count)
                options = knobsampling_manager.get_knob_samples()
                performance = knobsampling_manager.do_knob_sampling_test(
                    options)
                params = knobsampling_manager.get_best_params(
                    options, performance)
                options = knobsampling_manager.get_options_index(options)
            elif self.engine == 'tpe':
                from analysis.optimizer.tpe_optimizer import TPEOptimizer
                tpe_opt = TPEOptimizer(self.knobs, self.child_conn,
                                       self.max_eval)
                best_params = tpe_opt.tpe_minimize_tuning()
                final_param = {}
                final_param["finished"] = True
                final_param["param"] = best_params
                self.child_conn.send(final_param)
                return best_params
            elif self.engine == 'traverse':
                from analysis.optimizer.knob_traverse_manager import KnobTraverseManager
                default_values = [
                    p_nob['ref'] for _, p_nob in enumerate(self.knobs)
                ]
                knobtraverse_manager = KnobTraverseManager(
                    self.knobs, self.child_conn, default_values)
                traverse_list = knobtraverse_manager.get_traverse_list()
                performance = knobtraverse_manager.get_traverse_performance(
                    traverse_list)
                rank = knobtraverse_manager.get_traverse_rank(performance)
                final_param = {
                    "rank": rank,
                    "param": knobtraverse_manager.get_default_values(),
                    "finished": True
                }
                self.child_conn.send(final_param)
                return final_param["param"]

            LOGGER.info("Minimization procedure has been completed.")
        except ValueError as value_error:
            LOGGER.error('Value Error: %s', repr(value_error))
            self.child_conn.send(value_error)
            return None
        except RuntimeError as runtime_error:
            LOGGER.error('Runtime Error: %s', repr(runtime_error))
            self.child_conn.send(runtime_error)
            return None
        except Exception as err:
            LOGGER.error('Unexpected Error: %s', repr(err))
            self.child_conn.send(Exception("Unexpected Error:", repr(err)))
            return None

        for i, knob in enumerate(self.knobs):
            if estimator is not None:
                params[knob['name']] = ret.x[i]
            if self.engine != 'gridsearch':
                labels.append(knob['name'])

        LOGGER.info("Optimized result: %s", params)
        LOGGER.info("The optimized profile has been generated.")
        final_param = {}
        if self.sel_feature is True:
            if self.feature_selector == "wefs":
                wefs = WeightedEnsembleFeatureSelector()
                rank = wefs.get_ensemble_feature_importance(
                    options, performance, labels)
            elif self.feature_selector == "vrfs":
                vrfs = VarianceReductionFeatureSelector()
                rank = vrfs.get_ensemble_feature_importance(
                    options, performance, labels)
            final_param["rank"] = rank
            LOGGER.info(
                "The feature importances of current evaluation are: %s", rank)

        final_param["param"] = params
        final_param["finished"] = True
        self.child_conn.send(final_param)
        return params
예제 #17
0
def initialize_data(
    parameter_ranges: Sequence[Union[Sequence, Dimension]],
    data_path: Optional[str] = None,
    intermediate_data_path: Optional[str] = None,
    resume: bool = True,
) -> Tuple[list, list, list, int]:
    """Initialize data structures needed for tuning. Either empty or resumed from disk.

    Parameters
    ----------
    parameter_ranges : Sequence of Dimension objects or tuples
        Parameter range specifications as expected by scikit-optimize.
    data_path : str or None, default=None
        Path to the file containing the data structures used for resuming.
        If None, no resuming will be performed.
    intermediate_data_path : str or None, default=None
        Path to the file containing the data structures used for resuming an unfinished experiment.
        If None, no resuming will be performed.
    resume : bool, default=True
        If True, fill the data structures with the the data from the given data_path.
        Otherwise return empty data structures.

    Returns
    -------
    tuple consisting of list, list, list and int
        Returns the initialized data structures X, y, noise and iteration number.

    Raises
    ------
    ValueError
        If the number of specified parameters is not matching the existing number of
        parameters in the data.
    """
    logger = logging.getLogger()
    X = []
    y = []
    noise = []
    point = []
    iteration = 0
    round = 0
    counts_array = np.array([0, 0, 0, 0, 0])
    if data_path is not None and resume:
        space = normalize_dimensions(parameter_ranges)
        path = pathlib.Path(data_path)
        intermediate_path = pathlib.Path(intermediate_data_path)
        if intermediate_path.exists():
            with np.load(intermediate_path) as importa:
                round = importa["arr_0"]
                counts_array = importa["arr_1"]
                point = importa["arr_2"].tolist()
        if path.exists():
            with np.load(path) as importa:
                X = importa["arr_0"].tolist()
                y = importa["arr_1"].tolist()
                noise = importa["arr_2"].tolist()
            if len(X[0]) != space.n_dims:
                raise ValueError(
                    f"Number of parameters ({len(X[0])}) are not matching "
                    f"the number of dimensions ({space.n_dims})."
                )
            reduction_needed, X_reduced, y_reduced, noise_reduced = reduce_ranges(
                X, y, noise, space
            )
            if reduction_needed:
                backup_path = path.parent / (
                    path.stem + f"_backup_{int(time.time())}" + path.suffix
                )
                logger.warning(
                    f"The parameter ranges are smaller than the existing data. "
                    f"Some points will have to be discarded. "
                    f"The original {len(X)} data points will be saved to "
                    f"{backup_path}"
                )
                np.savez_compressed(
                    backup_path, np.array(X), np.array(y), np.array(noise)
                )
                X = X_reduced
                y = y_reduced
                noise = noise_reduced
            iteration = len(X)
    return X, y, noise, iteration, round, counts_array, point
예제 #18
0
def test_normalize_dimensions_all_categorical():
    dimensions = (['a', 'b', 'c'], ['1', '2', '3'])
    space = normalize_dimensions(dimensions)
    assert space.is_categorical
예제 #19
0
def test_normalize_dimensions(dimension, name):
    space = normalize_dimensions([dimension])
    assert space.dimensions[0].name == name
예제 #20
0
def test_normalize_dimensions(dimensions, normalizations):
    space = normalize_dimensions(dimensions)
    for dimension, normalization in zip(space, normalizations):
        assert dimension.transform_ == normalization
예제 #21
0
def test_normalize_dimensions(dimensions, normalizations):
    space = normalize_dimensions(dimensions)
    for dimension, normalization in zip(space, normalizations):
        assert dimension.transform_ == normalization
예제 #22
0
def test_normalize_dimensions(dimension, name):
    space = normalize_dimensions([dimension])
    assert space.dimensions[0].name == name