예제 #1
0
def test_homogenous(var_type):
    dim = 5

    def fitness(_):
        return np.random.rand()

    if var_type == "r":
        lb, ub = -1, 5
        space = RealSpace([lb, ub]) * dim
        mean = trend.constant_trend(dim, beta=None)
        thetaL = 1e-10 * (ub - lb) * np.ones(dim)
        thetaU = 10 * (ub - lb) * np.ones(dim)
        theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

        model = GaussianProcess(
            mean=mean,
            corr="squared_exponential",
            theta0=theta0,
            thetaL=thetaL,
            thetaU=thetaU,
            nugget=0,
            noise_estim=False,
            optimizer="BFGS",
            wait_iter=3,
            random_start=dim,
            likelihood="concentrated",
            eval_budget=100 * dim,
        )
    else:
        if var_type == "b":
            space = BoolSpace() * dim
        elif var_type == "i":
            space = IntegerSpace([0, 10], step=1) * dim
        elif var_type == "c":
            space = DiscreteSpace(list(range(10))) * dim
        elif var_type == "o":
            space = OrdinalSpace(list(string.ascii_lowercase))
        elif var_type == "s":
            space = SubsetSpace(list(string.ascii_lowercase)[:5])
        model = RandomForest(levels=space.levels)

    opt = BO(
        search_space=space,
        obj_fun=fitness,
        model=model,
        DoE_size=5,
        max_FEs=10,
        verbose=True,
        n_point=1,
    )
    print(opt.run())
def test_BO_equality():
    dim = 2
    search_space = RealSpace([0, 1]) * dim
    thetaL = 1e-5 * np.ones(dim)
    thetaU = np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL
    model = GaussianProcess(
        corr="squared_exponential",
        theta0=theta0,
        thetaL=thetaL,
        thetaU=thetaU,
        nugget=1e-1,
        random_state=42,
    )
    xopt, _, __ = BO(
        search_space=search_space,
        obj_fun=obj_fun,
        eq_fun=h,
        model=model,
        max_FEs=20,
        DoE_size=3,
        acquisition_fun="MGFI",
        acquisition_par={"t": 2},
        acquisition_optimization={"optimizer": "BFGS"},
        verbose=True,
        random_seed=42,
    ).run()
    assert np.isclose(h(xopt), 0, atol=1e-1)
def test_BO_constraints():
    search_space = (
        IntegerSpace([1, 10], var_name="mu")
        + IntegerSpace([1, 10], var_name="lambda")
        + RealSpace([0, 1], var_name="pc")
        + RealSpace([0.005, 0.5], var_name="p")
    )
    model = RandomForest(levels=search_space.levels)
    xopt, _, __ = BO(
        search_space=search_space,
        obj_fun=obj_fun2,
        ineq_fun=g,
        model=model,
        max_FEs=10,
        DoE_size=3,
        eval_type="dict",
        acquisition_fun="MGFI",
        acquisition_par={"t": 2},
        n_job=1,
        n_point=1,
        verbose=True,
        random_seed=42,
    ).run()
    assert isinstance(xopt, dict)
    assert all(np.array(g(xopt)) <= 0)
예제 #4
0
def test_BO(dim, obj_fun, ftarget, max_FEs, lb, ub, logfile):

    space = RealSpace(list(zip(lb, ub)))
    mean = trend.constant_trend(dim, beta=None)  # equivalent to Ordinary Kriging
    thetaL = 1e-10 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(
        mean=mean,
        corr="matern",
        theta0=theta0,
        thetaL=thetaL,
        thetaU=thetaU,
        noise_estim=False,
        nugget=1e-6,
        optimizer="BFGS",
        wait_iter=5,
        random_start=5 * dim,
        likelihood="concentrated",
        eval_budget=100 * dim,
    )

    return BO(
        search_space=space,
        obj_fun=obj_fun,
        model=model,
        DoE_size=dim * 5,
        max_FEs=max_FEs,
        verbose=False,
        n_point=1,
        minimize=True,
        ftarget=ftarget,
        logger=logfile,
    )
예제 #5
0
def test_BO(dim, obj_fun, ftarget, max_FEs, lb, ub, logfile):
    sys.path.insert(0, '../')
    from bayes_optim import AnnealingBO, BO, ContinuousSpace
    from bayes_optim.Surrogate import GaussianProcess, trend

    space = ContinuousSpace(list(zip(lb, ub)))
    mean = trend.constant_trend(dim,
                                beta=None)  # equivalent to Ordinary Kriging
    thetaL = 1e-10 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(mean=mean,
                            corr='matern',
                            theta0=theta0,
                            thetaL=thetaL,
                            thetaU=thetaU,
                            noise_estim=False,
                            nugget=1e-6,
                            optimizer='BFGS',
                            wait_iter=5,
                            random_start=5 * dim,
                            likelihood='concentrated',
                            eval_budget=100 * dim)

    return BO(search_space=space,
              obj_fun=obj_fun,
              model=model,
              DoE_size=dim * 5,
              max_FEs=max_FEs,
              verbose=False,
              n_point=1,
              minimize=True,
              ftarget=ftarget,
              logger=logfile)
예제 #6
0
def test_warm_data_with_RF():
    space = ContinuousSpace([-10, 10]) * 2 + \
        OrdinalSpace([5, 15]) + \
        NominalSpace(['OK', 'A', 'B', 'C', 'D', 'E', 'F', 'G'])

    X = space.sampling(10)
    y = [obj_fun(x) for x in X]

    model = RandomForest(levels=space.levels)
    opt = BO(search_space=space,
             obj_fun=obj_fun,
             model=model,
             minimize=True,
             eval_type='list',
             max_FEs=10,
             verbose=True,
             acquisition_fun='EI',
             warm_data=(X, y))
    opt.run()
    assert opt.data.shape[0] == 20
예제 #7
0
def test_infeasible_constraints():
    dim = 5
    lb, ub = -5, 5

    def fitness(_):
        return 1

    space = RealSpace([lb, ub]) * dim
    model = RandomForest(levels=space.levels)
    opt = BO(
        search_space=space,
        obj_fun=fitness,
        model=model,
        DoE_size=5,
        ineq_fun=lambda x: x[0] + 5.1,
        max_FEs=10,
        verbose=True,
        n_point=1,
    )
    with pytest.raises(AskEmptyError):
        opt.run()
예제 #8
0
def test_warm_data_with_RF():
    space = (RealSpace([-10, 10]) * 2 + IntegerSpace([5, 15]) +
             DiscreteSpace(["OK", "A", "B", "C", "D", "E", "F", "G"]))

    X = space.sample(10)
    y = [obj_fun(x) for x in X]

    model = RandomForest(levels=space.levels)
    opt = BO(
        search_space=space,
        obj_fun=obj_fun,
        model=model,
        minimize=True,
        eval_type="list",
        max_FEs=5,
        verbose=True,
        acquisition_fun="EI",
        warm_data=(X, y),
    )
    opt.run()
    assert opt.data.shape[0] == 15
예제 #9
0
def test_warm_data_with_GPR():
    dim = 2
    lb, ub = -5, 5

    def fitness(x):
        x = np.asarray(x)
        return np.sum(x**2)

    X = np.random.rand(5, dim) * (ub - lb) + lb
    y = [fitness(x) for x in X]
    space = RealSpace([lb, ub]) * dim

    thetaL = 1e-10 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(
        theta0=theta0,
        thetaL=thetaL,
        thetaU=thetaU,
        nugget=0,
        noise_estim=False,
        optimizer="BFGS",
        wait_iter=3,
        random_start=dim,
        likelihood="concentrated",
        eval_budget=100 * dim,
    )
    opt = BO(
        search_space=space,
        obj_fun=fitness,
        model=model,
        warm_data=(X, y),
        max_FEs=10,
        verbose=True,
        n_point=1,
    )
    assert np.all(np.asarray(opt.data) == np.asarray(opt.warm_data))
    assert opt.model.is_fitted
    opt.run()
예제 #10
0
def test_flat_continuous():
    dim = 5
    lb, ub = -1, 5

    def fitness(_):
        return 1

    space = RealSpace([lb, ub]) * dim

    mean = trend.constant_trend(dim, beta=None)
    thetaL = 1e-10 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(
        mean=mean,
        corr="squared_exponential",
        theta0=theta0,
        thetaL=thetaL,
        thetaU=thetaU,
        nugget=0,
        noise_estim=False,
        optimizer="BFGS",
        wait_iter=3,
        random_start=dim,
        likelihood="concentrated",
        eval_budget=100 * dim,
    )
    opt = BO(
        search_space=space,
        obj_fun=fitness,
        model=model,
        DoE_size=5,
        max_FEs=10,
        verbose=True,
        n_point=1,
    )
    with pytest.raises(FlatFitnessError):
        opt.run()
예제 #11
0
def test_continuous():
    dim = 5
    lb, ub = -1, 5

    def fitness(x):
        x = np.asarray(x)
        return np.sum(x**2)

    space = ContinuousSpace([lb, ub]) * dim

    mean = trend.constant_trend(dim, beta=None)
    thetaL = 1e-10 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(mean=mean,
                            corr='squared_exponential',
                            theta0=theta0,
                            thetaL=thetaL,
                            thetaU=thetaU,
                            nugget=0,
                            noise_estim=False,
                            optimizer='BFGS',
                            wait_iter=3,
                            random_start=dim,
                            likelihood='concentrated',
                            eval_budget=100 * dim)

    opt = BO(search_space=space,
             obj_fun=fitness,
             model=model,
             DoE_size=5,
             max_FEs=10,
             verbose=True,
             n_point=1)
    print(opt.run())
예제 #12
0
def test_BO(dim, obj_fun, ftarget, max_FEs, lb, ub, logfile):
    sys.path.insert(0, "../")
    from bayes_optim import BO, AnnealingBO, RealSpace
    from bayes_optim.Surrogate import GaussianProcess, trend

    space = RealSpace([lb, ub]) * dim

    mean = trend.constant_trend(dim, beta=0)  # equivalent to Ordinary Kriging
    thetaL = 1e-10 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(
        mean=mean,
        corr="matern",
        theta0=theta0,
        thetaL=thetaL,
        thetaU=thetaU,
        noise_estim=False,
        nugget=1e-6,
        optimizer="BFGS",
        wait_iter=5,
        random_start=5 * dim,
        likelihood="concentrated",
        eval_budget=100 * dim,
    )

    return BO(
        search_space=space,
        obj_fun=obj_fun,
        model=model,
        DoE_size=dim * 5,
        max_FEs=max_FEs,
        verbose=False,
        n_point=1,
        minimize=True,
        ftarget=ftarget,
        logger=logfile,
    )
def test_BO_bad_constraints():
    search_space = (
        DiscreteSpace(["1", "2", "3"], var_name="lambda")
        + RealSpace([0, 1], var_name="pc")
        + RealSpace([0.005, 0.5], var_name="p")
    )
    model = RandomForest(levels=search_space.levels)
    with pytest.raises(ConstraintEvaluationError):
        BO(
            search_space=search_space,
            obj_fun=lambda x: 10 * (x[0] == "3") + x[1] * x[2],
            ineq_fun=lambda x: sum(np.array(x) ** 2),
            model=model,
            max_FEs=10,
            DoE_size=3,
            eval_type="list",
            acquisition_fun="MGFI",
            acquisition_par={"t": 2},
            n_job=1,
            n_point=1,
            verbose=True,
            random_seed=42,
        ).run()
def test_BO_constraints():
    search_space = OrdinalSpace([1, 10], var_name='mu') + \
        OrdinalSpace([1, 10], var_name='lambda') + \
            ContinuousSpace([0, 1], var_name='pc') + \
                ContinuousSpace([0.005, 0.5], var_name='p')

    model = RandomForest(levels=search_space.levels)
    xopt, _, __ = BO(search_space=search_space,
                     obj_fun=obj_func,
                     ineq_fun=g,
                     model=model,
                     max_FEs=30,
                     DoE_size=3,
                     eval_type='dict',
                     acquisition_fun='MGFI',
                     acquisition_par={
                         't': 2
                     },
                     n_job=1,
                     n_point=1,
                     verbose=True).run()

    assert isinstance(xopt, dict)
    assert all(np.array(g(xopt)) <= 0)
예제 #15
0
def test_pickling():
    dim = 5
    lb, ub = -1, 5

    def fitness(x):
        x = np.asarray(x)
        return np.sum(x**2)

    space = RealSpace([lb, ub]) * dim

    mean = trend.constant_trend(dim, beta=None)
    thetaL = 1e-10 * (ub - lb) * np.ones(dim)
    thetaU = 10 * (ub - lb) * np.ones(dim)
    theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

    model = GaussianProcess(
        mean=mean,
        corr="squared_exponential",
        theta0=theta0,
        thetaL=thetaL,
        thetaU=thetaU,
        nugget=0,
        noise_estim=False,
        optimizer="BFGS",
        wait_iter=3,
        random_start=dim,
        likelihood="concentrated",
        eval_budget=100 * dim,
    )
    opt = BO(
        search_space=space,
        obj_fun=fitness,
        model=model,
        DoE_size=5,
        max_FEs=10,
        verbose=True,
        n_point=1,
        log_file="log",
    )
    opt.save("test")
    opt = BO.load("test")

    print(opt.run())

    os.remove("test")
    os.remove("log")

    opt = ParallelBO(
        search_space=space,
        obj_fun=fitness,
        model=model,
        DoE_size=5,
        max_FEs=10,
        verbose=True,
        n_point=3,
        log_file="log",
    )
    opt.save("test")
    opt = BO.load("test")
    print(opt.run())

    os.remove("test")
    os.remove("log")
예제 #16
0
def modeling(train, targets, to_optimize, **kwargs):
    """
    Training and performing hyperparmeter optimization
    by Bayesian Optimization. Currently only supporting
    Random Forests.
    TODO: Make the HO and train_seting more interactive

    :param to_optimize: perform or not HO (boolean)
    :param train: train set (pandas)
    :param targets: targets (labels) (np.arrays)
    :cv: CV count for hyperparameter optimization
    :to_drop: Features to be dropped from learning such as unit numbers,
     cycles, etc (list of string names)
    :DoE_size: Initial design of experiment for the BO HO.
    :max_FEs: maximum number of function evaluations of the BO HO
    :features_list= a list of features to use, cv=
    :return: trained model and list of used features
    """

    start = time.time()
    features_list = kwargs.get('features_list', None)
    to_drop = kwargs.get('to_drop', None)
    cv = kwargs.get('cv', 10)
    DoE_size = kwargs.get('DoE_size', 200)
    max_FEs = kwargs.get('max_FEs', 20)

    train_set = train.copy()
    if to_drop:
        print(f'The following features will not be used in training: {to_drop}')
        train_set.drop(to_drop, axis=1, inplace=True)

    if features_list:
        print('Features selected by user')
        train_set = train_set[features_list]
        train_set = train_set.values

    else:
        print('Feature Selection (this will take a while...)')
        train_set, features_list = boruta_feature_selection(train_set, targets)

        with open('./features_list.pkl', 'wb') as f:
            pkl.dump(features_list, f)

    df_columns = ['acc', 'max_depth', 'n_estimators', 'bootstrap', 'max_features', 'min_samples_leaf',
                  'min_samples_split']

    df_eval = pd.DataFrame(columns=df_columns)

    # Hyperparameter optimization
    # objective function
    def obj_func(x):

        # logger.info('Started internal cross-validation')
        nonlocal df_eval

        performance_ = []

        skf = StratifiedKFold(n_splits=cv, random_state=np.random, shuffle=True)
        for train_set_index, test_index in tqdm(skf.split(train_set, targets), 'Optimizing HO'):
            X_train_set, X_test = train_set[train_set_index], train_set[test_index]
            y_train_set, y_test = targets[train_set_index], targets[test_index]

            rf_ = RandomForestClassifier(n_estimators=int(x[1]), max_depth=int(x[0]), bootstrap=x[2],
                                         max_features=x[3], min_samples_leaf=x[4], min_samples_split=x[5],
                                         n_jobs=-1)

            rf_.fit(X_train_set, y_train_set)

            predictions_ = rf_.predict(X_test)

            performance_.append(accuracy_score(y_test, predictions_))

        val = np.mean(performance_)

        df_eval_tmp = pd.DataFrame([[val, x[0], x[1], x[2], x[3], x[4], x[5]]],
                                   columns=df_columns)
        df_eval = df_eval.append(df_eval_tmp)
        return val

    # definition of hyperparameter search space:
    max_depth = OrdinalSpace([2, 100])
    n_estimators = OrdinalSpace([1, 1000])
    min_samples_leaf = OrdinalSpace([1, 10])
    min_samples_split = OrdinalSpace([2, 20])
    bootstrap = NominalSpace(['True', 'False'])
    max_features = NominalSpace(['auto', 'sqrt', 'log2'])

    search_space = max_depth + n_estimators + bootstrap + max_features + min_samples_leaf + min_samples_split
    model = RandomForest(levels=search_space.levels)

    opt = BO(search_space=search_space, obj_fun=obj_func, model=model, max_FEs=max_FEs,
             DoE_size=DoE_size,
             n_point=1,
             n_job=1,
             minimize=False,
             verbose=False)

    if to_optimize:
        print(f'Hyperparameter optimization with {cv}-folds and {max_FEs} function evaluations')
        opt.run()
    best_params_ = df_eval[df_columns[1:]][df_eval['acc'] == df_eval['acc'].max()][:1].to_dict('records')

    # Training using the best parameters
    if to_optimize:
        rf = RandomForestClassifier(n_jobs=-1, **best_params_[0])
    else:
        rf = RandomForestClassifier(n_jobs=-1)
    rf.fit(train_set, targets)

    dump(rf, './rf_model.joblib')
    end = time.time()

    print(f'----Duration of training is {(end - start) / 60} minutes')

    return rf, features_list
예제 #17
0
# Discrete (nominal) variables can be specified as follows:
# No lb, ub... a list of categories instead
N = DiscreteSpace(["OK", "A", "B", "C", "D", "E", "F", "G"],
                  var_name="nominal")

# The whole search space can be constructed:
search_space = C + I + N

# Bayesian optimization also uses a Surrogate model
# For mixed variable type, the random forest is typically used
model = RandomForest(levels=search_space.levels)

opt = BO(
    search_space=search_space,
    obj_fun=obj_fun,
    model=model,
    max_FEs=50,
    DoE_size=3,  # the initial DoE size
    eval_type="dict",
    acquisition_fun="MGFI",
    acquisition_par={"t": 2},
    n_job=1,  # number of processes
    n_point=1,  # number of the candidate solution proposed in each iteration
    verbose=True,  # turn this off, if you prefer no output
)
xopt, fopt, stop_dict = opt.run()

print("xopt: {}".format(xopt))
print("fopt: {}".format(fopt))
print("stop criteria: {}".format(stop_dict))
예제 #18
0
space = RealSpace([lb, ub]) * dim
thetaL = 1e-10 * (ub - lb) * np.ones(dim)
thetaU = 10 * (ub - lb) * np.ones(dim)
theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

model = GaussianProcess(
    theta0=theta0,
    thetaL=thetaL,
    thetaU=thetaU,
    nugget=1e-3,
    noise_estim=True,
    optimizer="BFGS",
    wait_iter=3,
    random_start=dim,
    likelihood="concentrated",
    eval_budget=100 * dim,
)

opt = BO(
    search_space=space,
    obj_fun=fitness,
    model=model,
    DoE_size=5,
    max_FEs=20,
    verbose=True,
    n_point=1,
    acquisition_optimization={"optimizer": "OnePlusOne_Cholesky_CMA"},
)
print(opt.run())
예제 #19
0
    x = np.asarray(x)
    return np.sum(x**2)


space = ContinuousSpace([lb, ub]) * dim

mean = trend.constant_trend(dim, beta=None)
thetaL = 1e-10 * (ub - lb) * np.ones(dim)
thetaU = 10 * (ub - lb) * np.ones(dim)
theta0 = np.random.rand(dim) * (thetaU - thetaL) + thetaL

model = GaussianProcess(theta0=theta0,
                        thetaL=thetaL,
                        thetaU=thetaU,
                        nugget=0,
                        noise_estim=False,
                        optimizer='BFGS',
                        wait_iter=3,
                        random_start=dim,
                        likelihood='concentrated',
                        eval_budget=100 * dim)

opt = BO(search_space=space,
         obj_fun=fitness,
         model=model,
         DoE_size=5,
         max_FEs=50,
         verbose=True,
         n_point=1)
print(opt.run())