Ejemplo n.º 1
0
def job(loss):
    scorer = SklearnScorer(
        X, y, words, postfx,
        rules_apply=0.8,
        max_endings=75
    )

    space = {
        'alpha': (0.0001, 1.0, 'log-uniform'),
        'l1_ratio': (0.001, 0.999),
        'loss': [loss],
        'epsilon': (0.001, 10.0, 'log-uniform'),
        'threshold': (0.00001, 0.001, 'log-uniform'),
    }

    opt = Optimizer(point_aslist(space, space))

    for i in range(128):
        p = opt.ask()
        p = point_asdict(space, p)

        f = scorer(p)

        opt.tell(point_aslist(space, p), f)
        print(f)
        print(i, scorer.best_obj, scorer.best_params)

    import json
    json.dump(scorer.result, open(loss + '.json', 'w'), indent=2, sort_keys=True)
Ejemplo n.º 2
0
def test_dict_list_space_representation():
    """
    Tests whether the conversion of the dictionary and list representation
    of a point from a search space works properly.
    """

    chef_space = {
        'Cooking time': (0, 1200),  # in minutes
        'Main ingredient': [
            'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory'
        ],
        'Secondary ingredient': [
            'love', 'passion', 'dedication'
        ],
        'Cooking temperature': (-273.16, 10000.0)  # in Celsius
    }

    opt = Optimizer(dimensions=dimensions_aslist(chef_space))
    point = opt.ask()

    # check if the back transformed point and original one are equivalent
    assert_equal(
        point,
        point_aslist(chef_space, point_asdict(chef_space, point))
    )
Ejemplo n.º 3
0
def test_dict_list_space_representation():
    """
    Tests whether the conversion of the dictionary and list representation
    of a point from a search space works properly.
    """

    chef_space = {
        'Cooking time': (0, 1200),  # in minutes
        'Main ingredient': [
            'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory'
        ],
        'Secondary ingredient': [
            'love', 'passion', 'dedication'
        ],
        'Cooking temperature': (-273.16, 10000.0)  # in Celsius
    }

    opt = Optimizer(dimensions=dimensions_aslist(chef_space))
    point = opt.ask()

    # check if the back transformed point and original one are equivalent
    assert_equal(
        point,
        point_aslist(chef_space, point_asdict(chef_space, point))
    )
Ejemplo n.º 4
0
    def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
        """Generate n_jobs parameters and evaluate them in parallel.
        """

        # get parameter values to evaluate
        params = optimizer.ask(n_points=n_points)

        # convert parameters to python native types
        params = [[np.asscalar(np.array(v)) for v in p] for p in params]

        # make lists into dictionaries
        params_dict = [point_asdict(search_space, p) for p in params]

        # HACK: self.cv_results_ is reset at every call to _fit, keep current
        all_cv_results = self.cv_results_

        # HACK: this adds compatibility with different versions of sklearn
        refit = self.refit
        self.refit = False
        self._fit(X, y, groups, params_dict)
        self.refit = refit

        # merge existing and new cv_results_
        for k in self.cv_results_:
            all_cv_results[k].extend(self.cv_results_[k])

        self.cv_results_ = all_cv_results
        self.best_index_ = np.argmax(self.cv_results_['mean_test_score'])

        # feed the point and objective back into optimizer
        local_results = self.cv_results_['mean_test_score'][-len(params):]

        # optimizer minimizes objective, hence provide negative score
        return optimizer.tell(params, [-score for score in local_results])
    def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
        """Generate n_jobs parameters and evaluate them in parallel.
        """
        if isinstance(n_points, np.int64):
            n_points = int(n_points)
            # NOTE: THIS IS THE CODE ADDED TO BayesSearchCV (see class docstr)

        # get parameter values to evaluate
        params = optimizer.ask(n_points=n_points)
        params_dict = [point_asdict(search_space, p) for p in params]

        # HACK: self.cv_results_ is reset at every call to _fit, keep current
        all_cv_results = self.cv_results_

        # HACK: this adds compatibility with different versions of sklearn
        refit = self.refit
        self.refit = False
        self._fit(X, y, groups, params_dict)
        self.refit = refit

        # merge existing and new cv_results_
        for k in self.cv_results_:
            all_cv_results[k].extend(self.cv_results_[k])

        self.cv_results_ = all_cv_results
        self.best_index_ = np.argmax(self.cv_results_['mean_test_score'])

        # feed the point and objective back into optimizer
        local_results = self.cv_results_['mean_test_score'][-len(params):]

        # optimizer minimizes objective, hence provide negative score
        return optimizer.tell(params, [-score for score in local_results])
Ejemplo n.º 6
0
    def __call__(self, x):
        x_dict = point_asdict(self.search_space, x)
        x_dict['model__lr'] = np.log(x_dict['model__lr'])
        x_list = point_aslist(self.search_space, x_dict)

        score = ffnn_predict([x_list], self.simulator)[0][0]

        return -np.mean(score)
Ejemplo n.º 7
0
    def _step(
        self,
        optimizer: Optimizer,
        data: Dataset,
        metrics: List[str],
        cv: Any,
        n_jobs: int,
        verbose: int,
    ) -> Result:
        """
        Performs a step in the Bayesian optimization

        Parameters
        ----------
        optimizer: Optimizer
            An instance of skopt's Optimizer

        data: Dataset
           Instance of data to train on

        metrics: List of str
            List of metrics to calculate results for

        cv: Any
            Either a CV object from sklearn or an int to specify number of folds

        n_jobs
            Number of jobs to calculate in parallel

        verbose
            Verbosity level of the method

        Returns
        -------
        Result
        """
        params = optimizer.ask()
        params = [np.array(p).item() for p in params]

        # make lists into dictionaries
        params_dict = point_asdict(self.param_grid, params)
        estimator = clone(self.estimator).set_params(**params_dict)
        logger.info("Fitting estimator...")
        logger.debug("Fitting estimator %s", estimator)

        result = Result.from_estimator(
            estimator=estimator,
            data=data,
            metrics=metrics,
            cv=cv,
            n_jobs=n_jobs,
            verbose=verbose,
        )
        logger.info("Result: %s", result)
        optimizer.tell([params], [-result.metrics[0].score])
        return result
Ejemplo n.º 8
0
    def _step(self, X, y, search_space, optimizer, groups=None, n_points=1):
        """Generate n_jobs parameters and evaluate them in parallel."""

        # get parameter values to evaluate
        # TODO: Until n_points is supported, we will wrap the return value in a list
        params = [optimizer.ask(n_points=n_points)]

        # convert parameters to python native types
        params = [[np.array(v).item() for v in p] for p in params]

        # make lists into dictionaries
        params_dict = [point_asdict(search_space, p) for p in params]

        # HACK: self.cv_results_ is reset at every call to _fit, keep current
        all_cv_results = self.cv_results_

        # HACK: this adds compatibility with different versions of sklearn
        refit = self.refit
        self.refit = False
        self._fit(X, y, groups, params_dict)
        self.refit = refit

        # merge existing and new cv_results_
        for k in self.cv_results_:
            all_cv_results[k].extend(self.cv_results_[k])

        all_cv_results["rank_test_score"] = list(
            np.asarray(
                rankdata(-np.array(all_cv_results["mean_test_score"]),
                         method="min"),
                dtype=np.int32,
            ))
        if self.return_train_score:
            all_cv_results["rank_train_score"] = list(
                np.asarray(
                    rankdata(-np.array(all_cv_results["mean_train_score"]),
                             method="min"),
                    dtype=np.int32,
                ))
        self.cv_results_ = all_cv_results
        self.best_index_ = np.argmax(self.cv_results_["mean_test_score"])

        # feed the point and objective back into optimizer
        local_results = self.cv_results_["mean_test_score"][-len(params):]

        # optimizer minimizes objective, hence provide negative score
        return optimizer.tell(
            params,
            [-score for score in local_results],
            n_samples=self.n_samples,
            gp_samples=self.gp_samples,
            gp_burnin=self.gp_burnin,
            progress=False,
        )
Ejemplo n.º 9
0
 def __call__(self, x):
     model = Pipeline([
         ('features', ColumnSubset()),
         ('scaler', StandardScaler()),
         ('model', GradientBoostingRegressor()),
     ])
     x_dict = point_asdict(self.search_space, x)
     model.set_params(**x_dict)
     X, y = self.dataset
     scores = cross_val_score(model, X, y)
     return -np.mean(scores)
Ejemplo n.º 10
0
 def best_params_(self):
     check_is_fitted(self, "cv_results_")
     if self.return_policy == "best_setting" or len(self.optimizers_) > 1:
         if len(self.optimizers_) > 1:
             logging.warning(
                 "Return policy 'best_mean' is incompatible with multiple search"
                 "spaces. Reverting to 'best_setting'."
             )
         return self.cv_results_["params"][self.best_index_]
     if self.return_policy == "best_mean":
         random_state = self.optimizer_kwargs_["random_state"]
         # We construct a result object manually here, since in skopt versions up to
         # 0.7.4 they were not saved yet:
         opt = self.optimizers_[0]
         result_object = create_result(
             opt.Xi, opt.yi, space=opt.space, rng=random_state, models=[opt.gp]
         )
         point, _ = expected_minimum(
             res=result_object, n_random_starts=100, random_state=random_state,
         )
         dict = point_asdict(self.search_spaces, point)
         return dict
Ejemplo n.º 11
0
 def values_to_dict(self, values):
     return point_asdict(self.search_space, values)
Ejemplo n.º 12
0
print(search_result.fun)

#############################################################################

for fitness, x in sorted(zip(search_result.func_vals, search_result.x_iters)):
    print(fitness, x)

#############################################################################

space = search_result.space

print(search_result.x_iters)

search_space = {name: space[name][1] for name in space.dimension_names}

print(point_asdict(search_space, default_parameters))

#############################################################################
print("Plotting now ...")

_ = plot_histogram(result=search_result, dimension_identifier='learning_rate',
                   bins=20)
plt.show()

#############################################################################
_ = plot_objective_2D(result=search_result,
                      dimension_identifier1='learning_rate',
                      dimension_identifier2='num_dense_nodes')
plt.show()

#############################################################################