Esempio n. 1
0
def test_exhaust_initial_calls(base_estimator):
    # check a model is fitted and used to make suggestions after we added
    # at least n_initial_points via tell()
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=2,
                    acq_optimizer="sampling", random_state=1)

    x0 = opt.ask()  # random point
    x1 = opt.ask()  # random point
    assert x0 != x1
    # first call to tell()
    r1 = opt.tell(x1, 3.)
    assert len(r1.models) == 0
    x2 = opt.ask()  # random point
    assert x1 != x2
    # second call to tell()
    r2 = opt.tell(x2, 4.)
    if base_estimator.lower() == 'dummy':
        assert len(r2.models) == 0
    else:
        assert len(r2.models) == 1
    # this is the first non-random point
    x3 = opt.ask()
    assert x2 != x3
    x4 = opt.ask()
    r3 = opt.tell(x3, 1.)
    # no new information was added so should be the same, unless we are using
    # the dummy estimator which will forever return random points and never
    # fits any models
    if base_estimator.lower() == 'dummy':
        assert x3 != x4
        assert len(r3.models) == 0
    else:
        assert x3 == x4
        assert len(r3.models) == 2
Esempio n. 2
0
def test_exhaust_initial_calls(base_estimator):
    # check a model is fitted and used to make suggestions after we added
    # at least n_initial_points via tell()
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=2,
                    acq_optimizer="sampling", random_state=1)

    x0 = opt.ask()  # random point
    x1 = opt.ask()  # random point
    assert x0 != x1
    # first call to tell()
    r1 = opt.tell(x1, 3.)
    assert len(r1.models) == 0
    x2 = opt.ask()  # random point
    assert x1 != x2
    # second call to tell()
    r2 = opt.tell(x2, 4.)
    if base_estimator.lower() == 'dummy':
        assert len(r2.models) == 0
    else:
        assert len(r2.models) == 1
    # this is the first non-random point
    x3 = opt.ask()
    assert x2 != x3
    x4 = opt.ask()
    r3 = opt.tell(x3, 1.)
    # no new information was added so should be the same, unless we are using
    # the dummy estimator which will forever return random points and never
    # fits any models
    if base_estimator.lower() == 'dummy':
        assert x3 != x4
        assert len(r3.models) == 0
    else:
        assert x3 == x4
        assert len(r3.models) == 2
Esempio n. 3
0
def test_model_queue_size():
    # Check if model_queue_size limits the model queue size
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling", model_queue_size=2)

    opt.run(bench1, n_iter=3)
    # tell() computes the next point ready for the next call to ask()
    # hence there are three after three iterations
    assert_equal(len(opt.models), 2)
    assert_equal(len(opt.Xi), 3)
    opt.ask()
    assert_equal(len(opt.models), 2)
    assert_equal(len(opt.Xi), 3)
    assert_equal(opt.ask(), opt.ask())
def test_multiple_asks():
    # calling ask() multiple times without a tell() inbetween should
    # be a "no op"
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_random_starts=1,
                    acq_optimizer="sampling")

    opt.run(bench1, n_iter=3)
    # tell() computes the next point ready for the next call to ask()
    # hence there are three after three iterations
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    opt.ask()
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    assert_equal(opt.ask(), opt.ask())
Esempio n. 5
0
def test_dict_list_space_representation():
    """
    Tests whether the conversion of the dictionary and list representation
    of a point from a search space works properly.
    """

    chef_space = {
        'Cooking time': (0, 1200),  # in minutes
        'Main ingredient': [
            'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory'
        ],
        'Secondary ingredient': [
            'love', 'passion', 'dedication'
        ],
        'Cooking temperature': (-273.16, 10000.0)  # in Celsius
    }

    opt = Optimizer(dimensions=dimensions_aslist(chef_space))
    point = opt.ask()

    # check if the back transformed point and original one are equivalent
    assert_equal(
        point,
        point_aslist(chef_space, point_asdict(chef_space, point))
    )
Esempio n. 6
0
def test_multiple_asks():
    # calling ask() multiple times without a tell() inbetween should
    # be a "no op"
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)], base_estimator, n_initial_points=1,
                    acq_optimizer="sampling")

    opt.run(bench1, n_iter=3)
    # tell() computes the next point ready for the next call to ask()
    # hence there are three after three iterations
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    opt.ask()
    assert_equal(len(opt.models), 3)
    assert_equal(len(opt.Xi), 3)
    assert_equal(opt.ask(), opt.ask())
Esempio n. 7
0
def test_categorical_only2():
    from numpy import linalg
    from skopt.space import Categorical
    from skopt.learning import GaussianProcessRegressor
    space = [Categorical([1, 2, 3]), Categorical([4, 5, 6])]
    opt = Optimizer(space,
                    base_estimator=GaussianProcessRegressor(alpha=1e-7),
                    acq_optimizer='lbfgs',
                    n_initial_points=10,
                    n_jobs=2)

    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
    opt.tell(next_x, [linalg.norm(x) for x in next_x])
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
    opt.tell(next_x, [linalg.norm(x) for x in next_x])
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
Esempio n. 8
0
def test_acq_optimizer_with_time_api(base_estimator, acq_func):
    opt = Optimizer([(-2.0, 2.0),], base_estimator=base_estimator,
                    acq_func=acq_func,
                    acq_optimizer="sampling", n_initial_points=2)
    x1 = opt.ask()
    opt.tell(x1, (bench1(x1), 1.0))
    x2 = opt.ask()
    res = opt.tell(x2, (bench1(x2), 2.0))

    # x1 and x2 are random.
    assert_true(x1 != x2)

    assert_true(len(res.models) == 1)
    assert_array_equal(res.func_vals.shape, (2,))
    assert_array_equal(res.log_time.shape, (2,))

    # x3 = opt.ask()

    with pytest.raises(TypeError) as e:
        opt.tell(x2, bench1(x2))
Esempio n. 9
0
def test_acq_optimizer_with_time_api(base_estimator, acq_func):
    opt = Optimizer([(-2.0, 2.0),], base_estimator=base_estimator,
                    acq_func=acq_func,
                    acq_optimizer="sampling", n_initial_points=2)
    x1 = opt.ask()
    opt.tell(x1, (bench1(x1), 1.0))
    x2 = opt.ask()
    res = opt.tell(x2, (bench1(x2), 2.0))

    # x1 and x2 are random.
    assert x1 != x2

    assert len(res.models) == 1
    assert_array_equal(res.func_vals.shape, (2,))
    assert_array_equal(res.log_time.shape, (2,))

    # x3 = opt.ask()

    with pytest.raises(TypeError) as e:
        opt.tell(x2, bench1(x2))
Esempio n. 10
0
def test_categorical_only():
    from skopt.space import Categorical
    cat1 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
    cat2 = Categorical([2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

    opt = Optimizer([cat1, cat2])
    for n in range(15):
        x = opt.ask()
        res = opt.tell(x, 12 * n)
    assert len(res.x_iters) == 15
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4

    cat3 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])
    cat4 = Categorical(["2", "3", "4", "5", "6", "7", "8", "9", "10", "11"])

    opt = Optimizer([cat3, cat4])
    for n in range(15):
        x = opt.ask()
        res = opt.tell(x, 12 * n)
    assert len(res.x_iters) == 15
    next_x = opt.ask(n_points=4)
    assert len(next_x) == 4
Esempio n. 11
0
def test_defaults_are_equivalent():
    # check that the defaults of Optimizer reproduce the defaults of
    # gp_minimize
    space = [(-5., 10.), (0., 15.)]
    opt = Optimizer(space, random_state=1)

    for n in range(15):
        x = opt.ask()
        res_opt = opt.tell(x, branin(x))

    res_min = gp_minimize(branin, space, n_calls=15, random_state=1)

    assert res_min.space == res_opt.space
    # tolerate small differences in the points sampled
    assert np.allclose(res_min.x_iters, res_opt.x_iters, atol=1e-5)
    assert np.allclose(res_min.x, res_opt.x, atol=1e-5)
Esempio n. 12
0
def test_defaults_are_equivalent():
    # check that the defaults of Optimizer reproduce the defaults of
    # gp_minimize
    space = [(-5., 10.), (0., 15.)]
    opt = Optimizer(space, random_state=1)

    for n in range(15):
        x = opt.ask()
        res_opt = opt.tell(x, branin(x))

    res_min = gp_minimize(branin, space, n_calls=15, random_state=1)

    assert res_min.space == res_opt.space
    # tolerate small differences in the points sampled
    assert np.allclose(res_min.x_iters, res_opt.x_iters, atol=1e-5)
    assert np.allclose(res_min.x, res_opt.x, atol=1e-5)
Esempio n. 13
0
def test_defaults_are_equivalent():
    # check that the defaults of Optimizer reproduce the defaults of
    # gp_minimize
    space = [(-5., 10.), (0., 15.)]
    #opt = Optimizer(space, 'ET', acq_func="EI", random_state=1)
    opt = Optimizer(space, random_state=1)

    for n in range(12):
        x = opt.ask()
        res_opt = opt.tell(x, branin(x))

    #res_min = forest_minimize(branin, space, n_calls=12, random_state=1)
    res_min = gp_minimize(branin, space, n_calls=12, random_state=1)

    assert res_min.space == res_opt.space
    # tolerate small differences in the points sampled
    assert np.allclose(res_min.x_iters, res_opt.x_iters)  #, atol=1e-5)
    assert np.allclose(res_min.x, res_opt.x)  #, atol=1e-5)

    res_opt2 = opt.get_result()
    assert np.allclose(res_min.x_iters, res_opt2.x_iters)  # , atol=1e-5)
    assert np.allclose(res_min.x, res_opt2.x)  # , atol=1e-5)
Esempio n. 14
0
    def crossval_optimize_params(self,
                                 opt_metric,
                                 dataset,
                                 cv=3,
                                 opt_evals=50,
                                 metrics=None,
                                 verbose=False,
                                 client=None,
                                 workers=1,
                                 timeout=100,
                                 push_data=False,
                                 data_check=True,
                                 **kwargs):
        """Find optimal hyperparameters for all models

        Args:
            opt_metric (modelgym.metrics.Metric): metric to optimize
            dataset (modelgym.utils.XYCDataset or None): dataset
            cv (int or list of tuples of (XYCDataset, XYCDataset)): if int, then number of cross-validation folds or
                cross-validation folds themselves otherwise.
            opt_evals (int): number of cross-validation evaluations
            metrics (list of modelgym.metrics.Metric, optional): additional metrics to evaluate
            verbose (bool): Enable verbose output.
            **kwargs: ignored
        Note:
            if cv is int, than dataset is split into cv parts for cross validation. Otherwise, cv folds are used.
        """

        for name, model_space in self.model_spaces.items():
            self.ind2names[name] = [param.name for param in model_space.space]

        if metrics is None:
            metrics = [opt_metric]

        metrics.append(opt_metric)

        if isinstance(dataset, Path) or isinstance(dataset, str):
            if Path(dataset).expanduser().exists():
                dataset = read_csv(dataset)
            else:
                raise FileNotFoundError(errno.ENOENT,
                                        os.strerror(errno.ENOENT), dataset)
        if isinstance(dataset, DataFrame):
            if data_check:
                if dataset.isnull().values.any():
                    raise ValueError("Dataset has NA values")
                if "y" not in list(dataset.columns):
                    raise ValueError("Dataset doesn't have 'y' column")
                logging.info("Dataset is ok")
        else:
            raise ValueError(
                "Dataset should be DataFrame or path to the DataFrame")

        data_path = ""
        if client is None:
            cv_pairs = cv_split(dataset, cv)
        else:
            with tempfile.NamedTemporaryFile() as temp:
                dataset.to_csv(temp.name, index=False)
                data_path = client.send_data(temp.name, push_data)

        for name, model_space in self.model_spaces.items():
            if client is None:
                fn = lambda params: self._eval_fn(model_type=model_space.
                                                  model_class,
                                                  params=params,
                                                  cv=cv_pairs,
                                                  metrics=metrics,
                                                  verbose=verbose,
                                                  space_name=name)

                best = self.optimizer(fn,
                                      model_space.space,
                                      n_calls=opt_evals,
                                      n_random_starts=min(1, opt_evals))
            else:
                optimizer = Optimizer(dimensions=model_space.space,
                                      random_state=1,
                                      acq_func="gp_hedge")
                for _ in log_progress(range(opt_evals), every=1):
                    x = optimizer.ask(
                        n_points=workers)  # x is a list of n_points points
                    x_named = []
                    for params in x:
                        x_named.append({
                            self.ind2names[name][i]: params[i]
                            for i in range(len(params))
                        })
                    job_id_list = []
                    for model_params in x_named:
                        model_info = {
                            "models": [{
                                "type": model_space.model_class.__name__,
                                "params": model_params
                            }],
                            "metrics": [m.name for m in metrics[1:]],
                            "return_models":
                            False,
                            "cv":
                            cv
                        }
                        job_id_list.append(
                            client.eval_model(model_info=model_info,
                                              data_path=data_path))
                    result_list = client.gather_results(job_id_list,
                                                        timeout=timeout)
                    if result_list == []:
                        continue
                    y_succeed = [
                        result for result in result_list if not result is None
                    ]
                    x_succeed = [
                        x_dot for i, x_dot in enumerate(x)
                        if not result_list[i] is None
                    ]
                    self.logs += y_succeed
                    for res in y_succeed:
                        if self.best_results.get(name) is None:
                            self.best_results[name] = {"output": {"loss": 0}}
                        if res.get("output").get(
                                "loss") < self.best_results.get(name).get(
                                    "output").get("loss"):
                            self.best_results[name] = res
                    if y_succeed != []:
                        best = optimizer.tell(x_succeed, [
                            res.get("output").get("loss") for res in y_succeed
                        ])

        return self.best_results
Esempio n. 15
0
# Search from 0.0 to 6.0
dimensions = ((0.0, 6.0), )

# Initialize estimator.
gpr = GaussianProcessRegressor(kernel=Matern(), noise=0.0)
optimizer = Optimizer(dimensions=dimensions,
                      base_estimator=gpr,
                      n_random_starts=0,
                      acq_func="LCB",
                      random_state=0)

# Tell some points to the optimizer and ask for the next point.
X = np.reshape(np.linspace(5.0, 6.0, 10), (-1, 1)).tolist()
y = [black_box(xi) for xi in X]
optimizer.tell(X, y)
x_cand = optimizer.ask()
y_cand = black_box(x_cand)
plot = plot_space(X, y, optimizer.models[-1], x_cand)
plot.show()

# Tell and ask again.
optimizer.tell(x_cand, y_cand)
X = X + [x_cand]
y = y + [y_cand]
x_cand = optimizer.ask()
y_cand = black_box(x_cand)
plot = plot_space(X, y, optimizer.models[-1], x_cand)
plot.show()

# Tell and ask again.
optimizer.tell(x_cand, y_cand)