def test_explore_lazy():
    random_state = ensure_rng(0)
    xs = np.linspace(-2, 10, 1000)
    f = np.exp(-(xs - 2)**2) + np.exp(-(xs - 6)**2 / 10) + 1 / (xs**2 + 1)
    bo = BayesianOptimization(f=lambda x: f[int(x)],
                              pbounds={'x': (0, len(f) - 1)},
                              random_state=random_state,
                              verbose=0)
    bo.explore({'x': [f.argmin()]}, eager=False)
    assert len(bo.space) == 0
    assert len(bo.init_points) == 1

    # Note we currently expect lazy explore to override points
    # This may not be the case in the future.
    bo.explore({'x': [f.argmax()]}, eager=False)
    assert len(bo.space) == 0
    assert len(bo.init_points) == 1

    bo.maximize(init_points=0, n_iter=0, acq='ucb', kappa=5)

    res = bo.space.max_point()
    max_params = res['max_params']
    max_val = res['max_val']

    assert max_params['x'] == f.argmax()
    assert max_val == f.max()
Esempio n. 2
0
def main():
    # stdout_path = 'outcome_testBO.txt'
    # print '[INFO]  stdout_path:\t{}'.format(stdout_path)
    # sys.stdout = open(stdout_path, 'w')
    #
    # np.random.seed(1)
    print '#' * 53
    scores = []
    sensis = []
    specis = []
    for i in range(10):
        trainset, testset = load_data(i + 1)
        X_train, y_train = trainset
        X_test, y_test = testset

        def svccv(C, tol):
            return cross_val_score(SVC(C=C, random_state=1, tol=tol),
                                   X_train, y_train, cv=9).mean()


        def rfccv(n_estimators, min_samples_split, max_features):
            return cross_val_score(RFC(n_estimators=int(n_estimators),
                                       min_samples_split=int(min_samples_split),
                                       max_features=min(max_features, 0.999),
                                       random_state=2),
                                   X_train, y_train, 'f1', cv=5).mean()

        svcBO = BayesianOptimization(svccv, {'C': (10, 50000), 'tol': (0.0001, 0.1)})
        svcBO.explore({'C': [10, 100, 1000, 10000, 20000, 50000], 'tol': [0.0001, 0.001, 0.005, 0.01, 0.05, 0.1]})

        # rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250),
        # 'min_samples_split': (2, 25),
        # 'max_features': (0.1, 0.999)})

        svcBO.maximize(init_points=50, restarts=200, n_iter=100)

        print '#' * 53
        print 'Final Results'
        print 'SVC: %f' % svcBO.res['max']['max_val']
        print 'max_params: ', svcBO.res['max']['max_params']

        params = svcBO.res['max']['max_params']
        clf = SVC(C=params['C'], random_state=1, tol=params['tol'])
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        result = clf.predict(X_test)
        sensi, speci = my_scores(y_test, result)
        print 1 - score, sensi, speci
        # print 'err:', 1 - score

        scores.append(score)
        sensis.append(sensi)
        specis.append(speci)

    print scores
    print "accur:\t{}\tstd:\t{}".format(np.mean(scores), np.std(scores))
    print "sensi:\t{}".format(np.mean(sensis))
    print "speci:\t{}".format(np.mean(specis))
Esempio n. 3
0
    def opti(self):

        bo = BayesianOptimization(self.trainAndCompareHit, {"x": (10, 50), "y": (0.1, 1.0)})

        bo.explore({"x": range(10, 50), "y": [0.1, 0.25, 0.5, 0.75, 1.0]})

        bo.initialize({-11: {"x": 20, "y": 0.5}})

        bo.maximize(init_points=5, n_iter=5, kappa=3.29)

        print(bo.res["max"])
Esempio n. 4
0
def main():
	bo = BayesianOptimization(lambda fr, sm, mo, ma, nm, de, co: play_game(fr, sm, mo, ma, nm, de, co),
						  {'fr': (2, 6), 'sm': (-1, 1), 'mo': (0, 2),  'ma': (0, 2), 'nm': (-1, 1), 'de': (-1, 1), 'co': (-1, 1)})

	bo.explore({'fr': [5.0771664428677061], 'sm': [-0.13059762676063172], 'mo': [1.3682148714919597],
		'ma': [0.52214706278657907], 'nm': [-0.86627512983565302], 'de': [0.42238952601950097], 'co': [-0.39416823224808289]})

	bo.maximize(init_points=5, n_iter=50, kappa=0.5)

	# The output values can be accessed with self.res
	print 'RESULTS'
	print(bo.res['max'])
Esempio n. 5
0
def run(gpunum, cancer_type, feature_type, attempt):

    batch_size = 32
    epochs = 100

    os.environ["CUDA_VISIBLE_DEVICES"] = gpunum

    def get_session(gpu_fraction=1):
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_fraction, allow_growth=True)
        return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    ktf.set_session(get_session())

    results = []

    def scoreofModel(cancer_type, feature_type, attempt):
        def inner_SoM(pca, dropout, ae_dim1, ae_dim2):
            hidden_dims = 512
            print("**scoreofModel pca " + str(pca) + " dropout " +
                  str(dropout) + " hidden dims " + str(hidden_dims) +
                  " dim1 " + str(ae_dim1) + " dim2 " + str(ae_dim2))
            print("ct %s ft %s attempt %d" %
                  (cancer_type, feature_type, attempt))

            hidden_dims = int(hidden_dims)
            ae_dim1 = int(ae_dim1)
            ae_dim2 = int(ae_dim2)

            # AE
            with open('../test_bong/data/overlap_%s.pkl' % (cancer_type),
                      'rb') as handle:
                labels = pickle.load(handle)
                x = pickle.load(handle)
                y = pickle.load(handle)
            x_trn, x_tst, c_trn, c_tst, s_trn, s_tst, l_trn, l_tst = \
                train_test_split(x, y[:, 0], y[:, 1], labels, test_size=80, random_state=7)
            if variables.mse_tag == "DIV":
                s_trn = np.divide(s_trn, 1000.0)
                s_tst = np.divide(s_tst, 1000.0)
            elif variables.mse_tag == "LOG":
                s_trn = np.log(s_trn)
                s_tst = np.log(s_tst)

            x_trn, x_tst = AE_again_read.AE_model_save(cancer_type,
                                                       feature_type, ae_dim1,
                                                       ae_dim2, x_trn, x_tst)
            clf = PCA(pca, whiten=True)
            x_trn = clf.fit_transform(x_trn)
            x_tst = clf.transform(x_tst)

            def ModelV1(model_input):
                z = Dropout(dropout)(model_input)
                z = Dense(hidden_dims, activation='relu')(z)
                z = Dropout(dropout)(z)
                z = Dense(hidden_dims, activation='relu')(z)
                model_output = Dense(1, activation=None)(z)
                model = Model(model_input, model_output)
                #model.compile(loss=my_cindex(c_tst, s_tst), optimizer='adam')#,metrics=["mse"])
                model.compile(loss="mse", optimizer='adam')
                return model

            def ModelV2(model_input):
                z = Dense(hidden_dims, activation="selu")(model_input)
                z = BatchNormalization()(z)
                z = Dropout(dropout)(z)
                model_output = Dense(1)(z)

                model = Model(model_input, model_output)
                learning_ratio = 0.001
                sgd = SGD(lr=learning_ratio,
                          decay=1e-5,
                          momentum=0.9,
                          nesterov=True)
                model.compile(loss='mean_squared_error',
                              optimizer=sgd,
                              metrics=['accuracy', 'mean_squared_error'])

                return model

            x_trn, x_dev, c_trn, c_dev, s_trn, s_dev, l_trn, l_dev = train_test_split(
                x_trn, c_trn, s_trn, l_trn, test_size=20, random_state=7)

            print("x_trn %s, x_dex %s, x_tst %s" %
                  (str(x_trn.shape), str(x_dev.shape), str(x_tst.shape)))

            feature_dim = x_trn.shape[1]
            input_shape = (feature_dim, )
            model_input = Input(shape=input_shape)

            if variables.model_type == "V1":
                model = ModelV1(model_input)
            else:
                model = ModelV2(model_input)

                if variables.train_with_censored == "EXCLUDE":
                    x_trn = x_trn[c_trn == 0]
                    x_dev = x_dev[c_dev == 0]
                    s_trn = s_trn[c_trn == 0]
                    s_dev = s_dev[c_dev == 0]
                    print("reduced to x_trn %s, x_dev %s" %
                          (str(x_trn.shape), str(x_dev.shape)))

            data = tuple((x_trn, c_trn, s_trn, x_dev, c_dev, s_dev, x_tst,
                          c_tst, s_tst))

            model.summary()
            model_filepath = '../model/%s-%s-%d-%s-%s-%d-%d-%d.model' % (
                cancer_type, feature_type, attempt, str(pca), str(dropout),
                hidden_dims, ae_dim1, ae_dim2)
            checkpoint = MyCallback(results,
                                    model_filepath,
                                    data,
                                    real_save=True,
                                    verbose=0,
                                    save_best_only=True,
                                    mode='auto',
                                    cancer_type=cancer_type,
                                    feature_type=feature_type,
                                    thr=pca,
                                    dropout_prob=dropout,
                                    dimension=hidden_dims,
                                    activate='relu',
                                    AE1=ae_dim1,
                                    AE2=ae_dim2)
            callbacks_list = [checkpoint]

            history = model.fit(x_trn,
                                s_trn,
                                batch_size=batch_size,
                                shuffle=True,
                                callbacks=callbacks_list,
                                epochs=epochs,
                                validation_data=(x_dev, s_dev))

            #print("-----History----")
            #print(history.history.keys())
            #print(history.history)
            #print(len(history.history['val_loss']))

            pred_tst = model.predict(x_tst)

            return my_cindex(c_tst, s_tst)(s_tst, pred_tst)

        return inner_SoM

    def frange(x, y, jump):
        while x < y:
            yield x
            x += jump

    result_cindex = scoreofModel(cancer_type, feature_type, 0)(0.9999, 0.0,
                                                               1400, 700)

    # results
    # pca 0.98 dropout 0.0 hidden dims 512 dim1 1441 dim2 226       0.496551724138   0.524384112619
    # pca 0.98 dropout 0.0 hidden dims 256 dim1 1441 dim2 226      0.442857142857   0.512820512821
    #
    #pca 0.98 dropout 0.8 hidden dims 10.0 dim1 442.372154389 dim2 700.0 bad
    #pca 0.996454803902 dropout 0.0812376887453 hidden dims 532.204282998 dim1 1498.73305566 dim2 697.318639345 bad

    # **scoreofModel pca 0.98 dropout 0.0 hidden dims 512 dim1 1441.27853285 dim2 226.547497983
    bo_dict = {
        "pca": (0.99, 0.9999),
        "dropout": (0, 0.8),
        # "hidden_dims" : (10, 1000),
        "ae_dim1": (1400, 1500),
        "ae_dim2": (500, 700)
    }

    #for k in bo_dict.keys() :
    #    print(k)
    #    print (bo_dict[k])

    #scoreofModel(**{'ae_dim1': 1138.0196836044008, 'dropout': 0.18242910081095307, 'pca': 0.98912275449631237, 'hidden_dims': 373.61768597111694, 'ae_dim2': 472.20225514485821})

    v1BO = BayesianOptimization(scoreofModel(cancer_type, feature_type,
                                             attempt),
                                bo_dict,
                                verbose=True)

    v1BO.explore({
        "pca": [0.99, 0.1, 0.9999],
        "dropout": [0, 0.2, 0.8],
        # "hidden_dims" : [10, 200, 1000],
        "ae_dim1": [1400, 100, 1500],
        "ae_dim2": [500, 100, 700],
    })

    gp_params = {"alpha": 1e-5}

    v1BO.maximize(init_points=5, n_iter=40)

    print('Final Results')
    #print('max %f' % v1BO.res['max']['max_val'])
    #print('***<max>****')
    #print(v1BO.res['max'])
    #print('***<all>***')
    #print(v1BO.res['all'])
    results.append(v1BO.res['all'])
    #print(results)
    print(v1BO.res)

    with open('./BO_Result_' + cancer_type + '.txt', 'at') as f:

        params = v1BO.res['all']['params']
        values = v1BO.res['all']['values']
        keys = params[0].keys()

        for i in range(2):
            line = [cancer_type, feature_type]

            for k in keys:
                line.append(str(params[i][k]))
            line.append(str(values[i]))
            f.write('\t'.join(line) + '\n')
Esempio n. 6
0
                            n_estimators=10000,
                            seed=9999)
generate_metrics(XGB, os_train_data, os_train_target, test_data, test_target)


SKDNN = MLPClassifier(solver='adam',
                      alpha=1e-5,
                      batch_size='auto',
                      hidden_layer_sizes=(30,40,50,60),
                      learning_rate='adaptive',
                      learning_rate_init = 1e-2)
generate_metrics(SKDNN, os_train_data, os_train_target, test_data, test_target)

#--- XGB ensemble ----------------------
negpos = 1.0*(len(target)-target.sum())/target.sum()
def xgbcv(learning_rate, n_estimators):
    return my_cross_val_score(xgboost.XGBClassifier(learning_rate=
                                                    10**learning_rate,
                                                    n_estimators=int(n_estimators),
                                                    #scale_pos_weight=negpos
                                                    seed=9999
                                                    ),
                              os_train_data, os_train_target, cv=10).mean()

xgbBO = BayesianOptimization(xgbcv, {'learning_rate': (-4, -1),
                                     'n_estimators': (100, 1000)})
xgbBO.explore({'learning_rate': [-6, -3.5, -1],
               'n_estimators': [100,500,1000]})
xgbBO.maximize(init_points=10, n_iter=40)
print('XGB: %f' % xgbBO.res['max']['max_val'])
Esempio n. 7
0
    gp_params = {"alpha": 1e-5}

    #SVM
    svcBO = BayesianOptimization(svccv,
        {'gamma': (0.00001, 0.1)})

    svcBO.maximize(init_points=3, n_iter=4, **gp_params)

    #Random Forest
    rfcBO = BayesianOptimization(
        rfccv,
        {'n_estimators': (10, 300),
         'max_depth': (2, 10)
        }
    )
    rfcBO.explore({'max_depth': [2, 4, 6], 'n_estimators': [64, 128, 256]})

    rfcBO.maximize(init_points=4, n_iter=4, **gp_params)

    print('Final Results')
    print('SVC: %f' % svcBO.res['max']['max_val'])
    print('RFC: %f' % rfcBO.res['max']['max_val'])
    
    #visualize results
    x = np.linspace(0.00001,0.1,64).reshape(-1,1)
    plot_gp(svcBO,x)
    plt.show()

    rfc_X = map(lambda x: round(x,0), rfcBO.X[:,0])
    rfc_Y = map(lambda x: round(x,0), rfcBO.X[:,1])
    data = pd.DataFrame(data={'n_est':rfc_X,'max_depth':rfc_Y,'score':rfcBO.Y})
Esempio n. 8
0
lgbBO = BayesianOptimization(
    lgb_cv, {
        'min_child_weight': (1, 20),
        'colsample_bytree': (0.1, 1),
        'max_depth': (5, 15),
        'subsample': (0.5, 1),
        'learning_rate': (0, 1),
        'reg_lambda': (0, 1.0),
        'n_estimators': (20, 200),
    })
lgbBO.explore({
    "min_child_weight": [2, 5],
    "colsample_bytree": [0.7, 0.8],
    "max_depth": [5, 10],
    "learning_rate": [0.095, 0.001],
    'subsample': [0.7, 0.6],
    "reg_lambda": [0, 0.001],
    'n_estimators': [100, 50]
})
lgbBO.maximize(init_points=5, n_iter=20)

# In[8]:

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss, roc_auc_score
import lightgbm as lgb


def train_and_validate_model(x_train, y_train, x_validation, y_validation,
                             cls):
                                   n_informative=12,
                                   n_redundant=7)

def svccv(C, gamma):
    return cross_val_score(SVC(C=C, gamma=gamma, random_state=2),
                           data, target, 'f1', cv=5).mean()

def rfccv(n_estimators, min_samples_split, max_features):
    return cross_val_score(RFC(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               max_features=min(max_features, 0.999),
                               random_state=2),
                           data, target, 'f1', cv=5).mean()

if __name__ == "__main__":

    svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)})
    svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]})

    rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250),
                                         'min_samples_split': (2, 25),
                                         'max_features': (0.1, 0.999)})
    svcBO.maximize()

    print('-'*53)
    rfcBO.maximize()

    print('-'*53)
    print('Final Results')
    print('SVC: %f' % svcBO.res['max']['max_val'])
    print('RFC: %f' % rfcBO.res['max']['max_val'])
Esempio n. 10
0
    def testReferenceImplementation3D(self):
        """ Check for numeric correctness against reference implementation
        """
        def f(x):  # vector version
            return np.exp(-(x[0] - 2)**2) + np.exp(-(x[0] - 6)**2 / 10) + \
                1 / (x[0]**2 + 1) + np.sin(x[1]) + 5 * np.cos(6.42 * x[2])

        def ff(x, y, z):  # variable version
            return np.exp(-(x - 2)**2) + np.exp(-(x - 6)**2 / 10) + \
                1 / (x**2 + 1) + np.sin(y) + 5 * np.cos(6.42 * z)

        def posterior(gp, x):
            mu, sigma = gp.predict(x, return_std=True)
            return mu, sigma

        bounds = np.array([[-5, 5]] * 3)

        # Generate trainning data
        np.random.seed(6)
        X = np.random.uniform(bounds[:, 0],
                              bounds[:, 1],
                              size=(1000, bounds.shape[0]))
        w = [f(x) for x in X]

        np.random.seed(6)
        X_train = np.random.uniform(bounds[:, 0],
                                    bounds[:, 1],
                                    size=(3, bounds.shape[0]))
        y_train = [f(x) for x in X_train]
        rand_seed = 0

        gp_params = {
            "alpha": 1e-5,
            "n_restarts_optimizer": 25,
            "kernel": Matern(nu=2.5),
            "random_state": rand_seed
        }

        # Reference implementation
        optimizer = BO_ref(ff, {
            'x': (-5, 5),
            'y': (-5, 5),
            'z': (-5, 5)
        },
                           verbose=0)
        # append trainning data
        optimizer.explore({
            'x': X_train[:, 0],
            'y': X_train[:, 1],
            'z': X_train[:, 2]
        })
        # fit gaussian process regressor
        optimizer.maximize(init_points=0,
                           n_iter=0,
                           acq='ei',
                           xi=1e-4,
                           **gp_params)
        # get results
        post = np.array([posterior(optimizer.gp, x.reshape(1, -1)) for x in X])
        mu_ref, std_ref = post[:, 0], post[:, 1]

        utility_ref = optimizer.util.utility(X, optimizer.gp,
                                             optimizer.Y.max())

        # Testing implementation
        gp = get_fitted_gaussian_processor(np.array(X_train),
                                           np.array(y_train),
                                           None,
                                           standardize_y=False,
                                           **gp_params)
        util = UtilityFunction(kind='ei', gp_objective=gp, xi=1e-4)

        post_impl = np.array([posterior(gp, x.reshape(1, -1)) for x in X])
        mu_impl, std_impl = post_impl[:, 0], post_impl[:, 1]
        utility_impl = util.utility(X)

        assert (mu_ref == mu_impl).all(),\
            "mu(x) comparison failed"
        assert (std_ref == std_impl).all(),\
            "std(x) comparison failed"
        assert (utility_ref == utility_impl).all(),\
            "utility(x) comparison failed"
Esempio n. 11
0

dnnBO = BayesianOptimization(
    dnncv, {
        'h1': (10, 100),
        'h2': (10, 100),
        'h3': (10, 100),
        'h4': (10, 100),
        'learning_rate': (1e-4, 1e-1),
        'dropout': (0.1, 0.9),
    })

dnnBO.explore({
    'h1': [20, 50],
    'h2': [20, 50],
    'h3': [20, 50],
    'h4': [20, 50],
    'learning_rate': [1e-3, 1e-2],
    'dropout': [.3, .6],
})

dnnBO.maximize(init_points=12, n_iter=40)

print('DNN: %f' % dnnBO.res['max']['max_val'])
'''
def rfccv(n_estimators, min_samples_split, max_features):
    return cross_val_score(RFC(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               max_features=min(max_features, 0.999),
                               random_state=2),
                           data, train_age_target, cv=10, n_jobs=-1).mean()
def target(**inargs):
    ordered_values = [inargs[param_name] for param_name in param_names]
    return acc_dict[np.array(ordered_values).tostring()]


init_dict = OrderedDict()
for i, param_name in enumerate(param_names):
    init_dict[param_name] = (min(param_ranges[i]), max(param_ranges[i]))

bo = BayesianOptimization(target, init_dict, verbose=0)

done_params = np.reshape(results[:, :-1], (results.shape[0], nparam))
param_dict = OrderedDict()
for i, param_name in enumerate(param_names):
    param_dict[param_name] = done_params[:, i]
bo.explore(param_dict)

#you can tune the gp parameters and bo parameters
#when acq='ucb', set kappa within [10^-3, 10^-2, ..., 10^3]
#when acq='poi' or 'ei', set xi within [10^-3, 10^-2, ..., 10^3]
gp_params = {'kernel': None, 'alpha': 1e-5}
bo.maximize(init_points=0, n_iter=0, acq='poi', xi=0.01, **gp_params)
utility = bo.util.utility(all_params, bo.gp, 0)

sort_indices = np.argsort(utility)
sort_indices = sort_indices[::-1]

fid = open('output_params.txt', 'w')
icount = 0
for tmp_index in sort_indices:
    tmp_param = all_params[tmp_index]
Esempio n. 13
0
    num_folds = 5
    BO = BayesianOptimization(
        get_cls_result, {
            'learning_rate': (0.01, 0.5),
            'num_leaves': (30, 120),
            'colsample_bytree': (0.5, 1),
            'subsample': (0.8, 1),
            'max_depth': (5, 15),
            'reg_alpha': (0, 10),
            'reg_lambda': (0, 10),
            'min_split_gain': (0, 10),
            'min_child_weight': (1, 50),
        })

    BO.explore({
        'learning_rate': [0.01, 0.02, 0.1],
        'num_leaves': [20, 32, 50],
        'colsample_bytree': [0.5, 0.95, 0.99],
        'subsample': [0.8, 0.87, 0.95],
        'max_depth': [5, 8, 15],
        'reg_alpha': [0.04, 0.1, 0.2],
        'reg_lambda': [0.073, 0.2, 0.5],
        'min_split_gain': [0.02224, 0.1, 0.2],
        'min_child_weight': [20, 40, 50],
    })

    BO.maximize(init_points=5, n_iter=30)
    print('-' * 53)
    print('Final Results')
    print('LGB: %f' % BO.res['max']['max_val'])
Esempio n. 14
0
#---- SVM ---------------------------
def svccv(C, gamma):
    return cross_val_score(SVC(C=C,
                               gamma=gamma,
                               random_state=None,
                               probability=True),
                           data,
                           target,
                           cv=10).mean()


svcBO = BayesianOptimization(svccv, {
    'C': (0.001, 1000),
    'gamma': (0.0001, 0.1)
})
svcBO.explore({'C': [0.001, 0.01, 0.1, 1.0], 'gamma': [0.001, 0.01, 0.1, 1.0]})
svcBO.maximize(init_points=10, n_iter=20)
print('SVC: %f' % svcBO.res['max']['max_val'])


#---- XGB ----------------------------
def xgbcv(learning_rate, n_estimators):
    return cross_val_score(xgboost.XGBClassifier(
        learning_rate=learning_rate, n_estimators=int(n_estimators)),
                           data,
                           target,
                           cv=10).mean()


xgbBO = BayesianOptimization(xgbcv, {
    'learning_rate': (0.0001, 1.0),
Esempio n. 15
0
    if args.resume_bo:
        dramBO = pickle.load(open("dramBO.pkl", "rb"))
    else:
        dramBO = BayesianOptimization(dram, {
            "location_sigma": (1.0, 0.01),
            "lr": (0.1, 1e-8),
            "alpha": (1.0, 1e-10),
            "ratio": (1.0, 0.1),
            "adam_epsilon": (1e-7, 1e-12)
        }, verbose=1)

    dramBO.explore({
        "location_sigma": (0.3, 0.1),
        "lr": (0.003, 0.001),
        "alpha": (1e-7, 9e-8),
        "ratio": (0.3, 0.2),
        "adam_epsilon": (1e-8, 9e-9)
    })

    dramBO.maximize(init_points=3, n_iter=50, acq="ucb", kappa=2.576, xi=0.0)
    dramBO.maximize(init_points=3, n_iter=50, acq="poi", kappa=2.576, xi=0.0)
    dramBO.maximize(init_points=3, n_iter=50, acq="ei", kappa=2.576, xi=0.0)

    dramBO.explore({
        "location_sigma": (0.5, 0.01),
        "lr": (0.03, 3e-5),
        "alpha": (1.0, 1e-8),
        "ratio": (1.0, 0.1),
        "adam_epsilon": (1e-8, 1e-12)
    })
Esempio n. 16
0
QWK.explore({
    # 			'bi_rmm':     [0, 0,  0,   1,  0,  1,],
    # 			'rnn_layers': [0, 0,  0,   0,  2,  2,],
    # 			'embd_train': [0, 0,  0,   1,  1,  1,],
    # 			'embd_dim':   [0, 0,  1,   1,  2,  2,],
    # 			'tfidf':      [0, 0,   0,  0,   0, 1,],
    # 			'lr':         [0.001, 0.001],
    # 			'convwin':    [2, 2,  0],
    # 			'convkernel': [0, 32, 0],
    'rnn_dim': [
        0,
        0,
        32,
        32,
        64,
        64,
        128,
        128,
    ],
    'dropout': [
        0.2,
        0.6,
        0.2,
        0.6,
        0.2,
        0.6,
        0.3,
        0.6,
    ],
    'dropout_w': [
        0.2,
        0.4,
        0.4,
        0.5,
        0.4,
        0.4,
        0.4,
        0.5,
    ],
    'dropout_u': [
        0.2,
        0.4,
        0.4,
        0.5,
        0.4,
        0.4,
        0.3,
        0.6,
    ]
})
def kNNOptimize(train_set, test_set, njobs, ijob):

    delta_x = 10. / NBINS_X
    delta_y = 10. / NBINS_Y

    NBINS_TOTAL = NBINS_X * NBINS_Y
    ijob_bins = np.array_split(np.arange(NBINS_TOTAL), njobs)[ijob]

    for i_bin in ijob_bins:

        bin_filename = 'knn_bayes/{0:05d}_{1:02d}_{2:02d}.json'.format(
            i_bin, NBINS_X, NBINS_Y)
        if os.path.isfile(bin_filename):
            continue

        y_lower = int(i_bin / NBINS_X) * delta_y
        x_lower = (i_bin % NBINS_X) * delta_x

        x_upper = x_lower + delta_x
        y_upper = y_lower + delta_y

        # this block is needed because some points fall on the right or
        # top boundary of the domain exactly.
        if x_upper == 10.:
            x_upper += 1.0e-5
        if y_upper == 10.:
            y_upper += 1.0e-5

        initial_points = {"cut_threshold": (5, 7),
                          "w_x": (450, 550),
                          "w_y": (1050, 950),
                          "w_hour": (4, 2),
                          "w_log10acc": (10, 10),
                          "w_weekday": (2, 3),
                          "w_year": (9, 11),
                          "n_neighbors": (20, 25),
                          "margin": (0.02, 0.03)
                          }

        f = functools.partial(validation_map3_kNN,
                              train_set=train_set,
                              xlower=x_lower, xupper=x_upper,
                              ylower=y_lower, yupper=y_upper)
        bo = BayesianOptimization(f=f,
                                  pbounds={"cut_threshold": (3, 12),
                                           "w_x": (250, 1000),
                                           "w_y": (500, 2000),
                                           "w_hour": (1, 10),
                                           "w_log10acc": (5, 30),
                                           "w_weekday": (1, 10),
                                           "w_year": (2, 20),
                                           "n_neighbors": (10, 40),
                                           "margin": (0.01, 0.04)
                                           },
                                  verbose=True)

        # this little bit of code allows seeding of the bayesian optimizer
        # with a few points that you already know are decent parameter values.
        # initial points are based off @Sandro's kNN script.
        #
        # seed the bayesian optimizer with a couple of points.
        bo.explore(initial_points)

        # For some reason that I don't understand, the Bayesian optimizer slows
        # down greatly after 64 iterations. So to be more computationally
        # efficient, limit it to 64.

        # explore the space (xi=0.1)
        # 2 custom (above), 5 initial (implied), 25 exploration  = 32 total
        bo.maximize(n_iter=25, acq="ei", xi=0.1)

        # exploit the peaks for the other 32 iterations (xi=0.)
        bo.maximize(n_iter=32, acq="ei", xi=0.0)

        optimizer_output = bo.res['all']
        optimizer_output['max'] = bo.res['max']

        optimizer_output['i_bin'] = i_bin
        optimizer_output['nx'] = NBINS_X
        optimizer_output['ny'] = NBINS_Y
        optimizer_output['x_lower'] = x_lower
        optimizer_output['y_lower'] = y_lower
        optimizer_output['x_upper'] = x_upper
        optimizer_output['y_upper'] = y_upper

        with open(bin_filename, 'w') as fh:
            fh.write(json.dumps(optimizer_output, sort_keys=True,
                                indent=4, separators=(',', ': ')))
Esempio n. 18
0
def optimize():
    # Lasso optimization
    # lasso_BO = BayesianOptimization(lasso_func, {'alpha': (0.000001, .05)})
    # lasso_BO.explore({'alpha': [.00001, .0003, 0.001, 0.01]})
    # lasso_BO.maximize(n_iter=100)
    # print(lasso_BO.res['max'])

    # KRR optimization
    # krr_BO = BayesianOptimization(krr_func, {'alpha': (0,.05), 'degree': (1,5), 'coef0': (0, 10000)})
    # krr_BO.explore({'alpha': [0.001, 0.005, .05], 'degree':[2, 3, 4], 'coef0':[0, .5, 10]})
    # krr_BO.maximize(n_iter=100)
    # print(krr_BO.res['max'])

    # # Elastic optimization
    elastic_BO = BayesianOptimization(elastic_func, {
        'alpha': (0, 10000),
        'l1_ratio': (0, 1)
    })
    elastic_BO.explore({
        'alpha': [0.001, 0.1, 1, 10, 100, 1000, 5000],
        'l1_ratio': [0, .1, .2, .3, .5, .7, .9]
    })
    elastic_BO.maximize(n_iter=1)
    print(elastic_BO.res['max'])

    # Random forest optimization
    # rf_BO = BayesianOptimization(rf_func, {'n_estimators': (1,1000), 'max_depth': (1,500)})
    # rf_BO.explore({'n_estimators': [25, 50, 100, 200, 400], 'max_depth':[10, 40, 80, 320, 500]})
    # rf_BO.maximize(n_iter=100)
    # print(rf_BO.res['max'])

    # svr_BO = BayesianOptimization(svr_func, {'C': (0, 10), 'epsilon':(0,10)})
    # svr_BO.explore({'C': [.001, 0.01, 0.1, 1, 10, 100, 1000], 'epsilon':[.001, .01, .1, 1, 10, 100, 1000]})
    # svr_BO.maximize(n_iter=100)
    # print(svr_BO.res['max'])

    # XGBoost optimization
    xgb_BO = BayesianOptimization(
        xgb_func, {
            'min_child_weight': (1, 4),
            'colsample_bytree': (0.1, 1),
            'max_depth': (2, 9),
            'subsample': (0.3, .8),
            'gamma': (0, 1),
            'alpha': (0, 1),
            'num_rounds': (2000, 7000)
        })

    xgb_BO.maximize(init_points=5, n_iter=300)
    print(xgb_BO.res['max'])

    # Lgb optimization
    lgb_BO = BayesianOptimization(
        lgb_func, {
            'num_leaves': (1, 20),
            'lr': (0.001, .05),
            'num_estimators': (400, 1500),
            'max_bin': (30, 70),
            'bagging_fraction': (0, 1),
            'bagging_freq': (3, 8),
            'feature_fraction': (.6, 1),
            'min_data_in_leaf': (3, 10),
            'min_sum_hessian_in_leaf': (4, 20)
        })
    lgb_BO.explore({
        'num_leaves': [5, 5, 5, 4, 5, 6, 7],
        'lr': [.01, .01, .01, .01, .01, .01, .01],
        'num_estimators': [200, 300, 500, 700, 800, 900, 1000],
        'max_bin': [10, 40, 80, 100, 55, 30, 53],
        'bagging_fraction': [.7, .8, .6, .2, .9, .7, .3],
        'bagging_freq': [4, 5, 2, 7, 2, 4, 5],
        'feature_fraction': [.2, .3, .25, .3, .21, .15, .13],
        'min_data_in_leaf': [5, 6, 5, 4, 4, 7, 7],
        'min_sum_hessian_in_leaf': [8, 2, 5, 10, 11, 15, 17]
    })
    lgb_BO.maximize(init_points=5, n_iter=300)
    print(lgb_BO.res['max'])

    print("=" * 50)
    print(lasso_BO.res['max'])
    print(elastic_BO.res['max'])
    # print(rf_BO.res['max'])
    # print(krr_BO.res['max'])
    print(xgb_BO.res['max'])
    print(lgb_BO.res['max'])
Esempio n. 19
0
val_pi_labels_onehot = np.load('./val_pi_labels_onehot.out.npy')
val_dna_seqs = pickle.load(open('./val_dna_seqs.out', 'rb'))
val_dna_seqs_onehot = np.transpose(convert_onehot2D(val_dna_seqs),
                                   axes=(0, 2, 1))
global num_classes
num_classes = val_pi_labels_onehot.shape[1]
global dna_bp_length
dna_bp_length = len(val_dna_seqs[0])

# perform bayesian optimization within hyperparameter ranges, with initial guesses
print("Start Bayesian optimization")
gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2}
bo = BayesianOptimization(
    target, {
        'total_epoch': (5, 5),
        'filter_num': (1, 512),
        'filter_len': (1, 48),
        'num_dense_nodes': (1, 256)
    })
bo.explore({
    'total_epoch': [5, 5, 5],
    'filter_num': [512, 256, 128],
    'filter_len': [48, 24, 12],
    'num_dense_nodes': [256, 128, 64]
})
bo.maximize(init_points=0, n_iter=20, acq="ucb", kappa=5, **gp_params)

# print output values from bayesian optimization
print(bo.res['max'])
print(bo.res['all'])
Esempio n. 20
0
def gp_opt_for_policy_search(T,
                             s,
                             y,
                             beta,
                             eta_init,
                             treatment_budget,
                             k,
                             env,
                             infection_probs_predictor,
                             infection_probs_kwargs,
                             transmission_probs_predictor,
                             transmission_probs_kwargs,
                             data_depth,
                             n_rep_per_gp_opt_iteration=10):

    # Objective is mean score over n_rep_per_gp_opt_iteration MC replicates
    def objective(eta1, eta2, eta3):
        eta = np.array([eta1, eta2, eta3])
        scores = []

        for _ in range(n_rep_per_gp_opt_iteration):
            s_tpm = s
            y_tpm = y
            a_dummy = np.zeros(env.L)
            for m in range(T):
                # print(m)
                # Plus perturbation
                priority_score = R(env, s_tpm, a_dummy, y_tpm,
                                   infection_probs_predictor,
                                   infection_probs_kwargs,
                                   transmission_probs_predictor,
                                   transmission_probs_kwargs, data_depth, eta,
                                   beta)
                # env, s, a, y, infection_probs_predictor, infection_probs_kwargs, transmission_prob_predictor,
                # transmission_probs_kwargs, data_depth, eta, bet
                a_tpm = decision_rule(env, s_tpm, a_dummy, y_tpm,
                                      infection_probs_predictor,
                                      infection_probs_kwargs,
                                      transmission_probs_predictor,
                                      transmission_probs_kwargs, eta, beta, k,
                                      treatment_budget, priority_score)
                infection_probs = infection_probs_predictor(
                    a_tpm, y_tpm, beta, env.L, env.adjacency_list,
                    **infection_probs_kwargs)
                y_tpm = np.random.binomial(n=1, p=infection_probs)
                scores.append(-np.mean(y_tpm))
        return np.mean(scores)

    ETA_BOUNDS = (0.0, np.power(1, -1 / 3))
    explore_ = {
        'eta1': [eta_init[0]],
        'eta2': [eta_init[1]],
        'eta3': [eta_init[2]]
    }
    bounds = {'eta1': ETA_BOUNDS, 'eta2': ETA_BOUNDS, 'eta3': ETA_BOUNDS}
    bo = BayesianOptimization(objective, bounds)
    bo.explore(explore_)
    bo.maximize(init_points=10, n_iter=10, alpha=1e-4)
    best_param = bo.res['max']['max_params']
    best_params = [best_param['eta1'], best_param['eta2'], best_param['eta3']]

    return best_params
Esempio n. 21
0
def run_hyperparameter_optimization(options, run_exp):
    """
    This function performs hyperparameter optimization using bayesian optimization, random search, or gridsearch.

    It takes an argparse object holding the parameters for configuring an experiments, and a function
    'run_exp' that takes the argparse object, runs an experiments with the respective configuration, and
    returns a score from that configuration.
    It then uses the hyperparameter optimization method to adjust the parameters and run the new configuration.

    Parameters:
    ================

    argparse :
        The argparse object holding the parameters. In particular, it must contain the following two parameters.
        'optimization' : str, Specifies the optimization method. Either 'bayesian', 'random', or 'grid'.
        'optimization_spaces' : str, Specifies the path to a file that denotes the parameters to do search over and
        their possible values (in case of grid search) or possible spaces. See file 'default_optimization_space' for
        details.

    run_exp : function
        A function that takes the argparse object as input and returns a float that is interpreted as the
        score of the configuration (higher is better).

    """

    if options.optimization:

        def optimized_experiment(**parameters):

            current_options = _update_options(options, **parameters)
            result = run_exp(current_options)

            # return the f1 score of the previous experiment
            return result

        if options.optimization == "bayesian":

            gp_params = {"alpha": 1e-5, "kernel" : Matern(nu = 5 / 2)}
            space, init_vals, num_init_vals = _make_space(options)
            bayesian_optimizer = BayesianOptimization(optimized_experiment, space)
            bayesian_optimizer.explore(init_vals)
            bayesian_optimizer.maximize(n_iter=options.optimization_iterations - num_init_vals,
                                        acq = 'ei',
                                        **gp_params)

        elif options.optimization == "random":

            fmin(lambda parameters : optimized_experiment(**parameters),
                        _make_space(options),
                        algo=rand.suggest,
                        max_evals=options.optimization_iterations,
                        rstate = np.random.RandomState(1337))

        elif options.optimization == "grid":
            # perform grid-search by running every possible parameter combination
            combinations = _all_option_combinations(_make_space(options))
            for combi in combinations:
                optimized_experiment(**combi)

    else:
        raise Exception("No hyperparameter method specified!")
Esempio n. 22
0
        'max_depth': (2, 12),
        'gamma': (0.001, 10.0),
        'min_child_weight': (0, 20),
        'max_delta_step': (0, 10),
        'subsample': (0.4, 1.0),
        'colsample_bytree': (0.4, 1.0)
    })

# This portion of the code is not necessary. You can simply specify that 10-20 random parameter combinations (**init_points** below) be used. However, I like to try couple of high- and low-end values for each parameter as a starting point, and after that fewer random points are needed. Note that a number of options must be the same for each parameter, and they are applied vertically.

# In[ ]:

XGB_BO.explore({
    'max_depth': [3, 8, 3, 8, 8, 3, 8, 3],
    'gamma': [0.5, 8, 0.2, 9, 0.5, 8, 0.2, 9],
    'min_child_weight': [0.2, 0.2, 0.2, 0.2, 12, 12, 12, 12],
    'max_delta_step': [1, 2, 2, 1, 2, 1, 1, 2],
    'subsample': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8],
    'colsample_bytree': [0.6, 0.8, 0.6, 0.8, 0.6, 0.8, 0.6, 0.8],
})

# In my version of sklearn there are many warning thrown out by the GP portion of this code. This is set to prevent them from showing on screen.
#
# If you have a special relationship with your computer and want to know everything it is saying back, you'd probably want to remove the two "warnings" lines and slide the XGB_BO line all the way left.
#
# I am doing only 2 initial points, which along with 8 exploratory points above makes it 10 "random" parameter combinations. I'd say that 15-20 is usually adequate. For n_iter 25-50 is usually enough.
#
# There are several commented out maximize lines that could be worth exploring. The exact combination of parameters determines **[exploitation vs. exploration](https://github.com/fmfn/BayesianOptimization/blob/master/examples/exploitation%20vs%20exploration.ipynb)**. It is tough to know which would work better without actually trying, though in my hands exploitation with "expected improvement" usually works the best. That's what the XGB_BO.maximize line below is specifying.

# In[ ]:

print('-' * 130)
Esempio n. 23
0
    val = cross_val_score(SVC(C=C, gamma=gamma, random_state=5),
                          X_train,
                          y_train,
                          'recall_weighted',
                          cv=5).mean()
    return val


gp_params = {"alpha": 1e-5}
clfBO = BayesianOptimization(classifier, {
    'C': (1, 600),
    'gamma': (0.001, 0.01)
})

clfBO.explore({
    'C': [10, 150, 10, 300, 400],
    'gamma': [0.001, 0.01, 0.001, 0.01, 0.01]
})
clfBO.maximize(n_iter=10, **gp_params)
print('-' * 53)

print('#' * 53)
print('Final Results')
print('SVC: %f' % clfBO.res['max']['max_val'])

params = {'kernel': 'rbf', 'gamma': 0.0100, 'C': 574.777}
classifier = SVC(**params)
classifier.fit(X_train, y_train)
y_true, y_pred = y_test, classifier.predict(X_test)
print "\nFull performance report:\n"
print classification_report(y_true, y_pred)
Esempio n. 24
0
    plt.legend()
    plt.title('Bayesian Optimization performance vs Iterations')
    plt.xlabel('Number of iterations')
    plt.ylabel('Validation Set Accuracy')
    plt.savefig('bayes_opt.png')
    plt.close(1)

train_data_path = 'income-data/income.train.txt' 
dev_data_path = 'income-data/income.dev.txt'
test_data_path = 'income-data/income.test.txt'
read_data(train_data_path, dev_data_path, test_data_path)
    
# Perform Bayesian Optimization for Bagging
bagging = BayesianOptimization(optimize_bagging, {'max_depth': [1, 100], 'n_estimators': [1, 100]})
    
bagging.explore({'max_depth': [1, 2, 3, 5, 10], 'n_estimators': [1, 2, 5, 10, 20]})

# Run for 50 iterations
bagging.maximize(n_iter = 50)
    
# Perform Bayesian Optimization for Boosting
boosting = BayesianOptimization(optimize_boosting, {'max_depth': [1, 100], 'n_estimators': [1, 100]})
    
boosting.explore({'max_depth': [1, 2, 3, 5, 10], 'n_estimators': [1, 2, 5, 10, 20]})

# Run for 50 iterations
boosting.maximize(n_iter = 50)

# Write output to file
generate_results(bagging, boosting)
Esempio n. 25
0
def optimize_postproc_params(arch_to_paths, arches, train_data_path):
    def bo_best(self):
        return {'max_val': self.Y.max(),
                'max_params': dict(zip(self.keys, self.X[self.Y.argmax()]))}

    preload, seeded_objective = _make_scorable_objective(arch_to_paths, arches,
                                                         train_data_path)
    preload()  # read datas into memory

    seeded_bounds = {
        'mask_thresh': (.4, .9),
        'seed_thresh': (.4, .9),
        'min_seed_size': (0, 100),
        'min_size': (0, 100),
        'alpha': (0.0, 1.0),
    }

    seeded_bo = BayesianOptimization(seeded_objective, seeded_bounds)
    cand_params = [
        {'mask_thresh': 0.9000, 'min_seed_size': 100.0000, 'min_size': 100.0000, 'seed_thresh': 0.4000},
        {'mask_thresh': 0.8367, 'seed_thresh': 0.4549, 'min_seed_size': 97, 'min_size': 33},  # 'max_val': 0.8708
        {'mask_thresh': 0.8367, 'min_seed_size': 97.0000, 'min_size': 33.0000, 'seed_thresh': 0.4549},  # max_val': 0.8991
        {'mask_thresh': 0.7664, 'min_seed_size': 48.5327, 'min_size': 61.8757, 'seed_thresh': 0.4090},  # 'max_val': 0.9091}
        {'mask_thresh': 0.6666, 'min_seed_size': 81.5941, 'min_size': 13.2919, 'seed_thresh': 0.4241},  # full dataset 'max_val': 0.9142}
        # {'mask_thresh': 0.8, 'seed_thresh': 0.5, 'min_seed_size': 20, 'min_size': 0},
        # {'mask_thresh': 0.5, 'seed_thresh': 0.8, 'min_seed_size': 20, 'min_size': 0},
        # {'mask_thresh': 0.8338, 'min_seed_size': 25.7651, 'min_size': 38.6179, 'seed_thresh': 0.6573},
        # {'mask_thresh': 0.6225, 'min_seed_size': 93.2705, 'min_size': 5, 'seed_thresh': 0.4401},
        # {'mask_thresh': 0.7870, 'min_seed_size': 85.1641, 'min_size': 64.0634, 'seed_thresh': 0.4320},
    ]
    for p in cand_params:
        p['alpha'] = .88
    n_init = 2 if DEBUG else 40

    seeded_bo.explore(pd.DataFrame(cand_params).to_dict(orient='list'))

    # Basically just using this package for random search.
    # The BO doesnt seem to help much
    seeded_bo.plog.print_header(initialization=True)
    seeded_bo.init(n_init)
    print('seeded ' + ub.repr2(bo_best(seeded_bo), nl=0, precision=4))

    gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2}

    n_iter = 2 if DEBUG else 10
    for kappa in [10, 5, 1]:
        seeded_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params)

    best_res = bo_best(seeded_bo)
    print('seeded ' + ub.repr2(best_res, nl=0, precision=4))

    max_params = best_res['max_params']
    max_value = best_res['max_val']

    # search for a good alpha
    # TODO: improve bayes_opt package to handle this
    for alpha in tqdm.tqdm(np.linspace(0, 1, 50), desc='opt alpha'):
        params = max_params.copy()
        params['alpha'] = alpha
        val = seeded_objective(**params)
        if val > max_value:
            max_value = val
            max_params = params
    return max_value, max_params
print("Size of Test Set: Columns = {}, Rows = {}"). \
    format(X_Final.shape[1], X_Final.shape[0])


##############################################################################
# Bayesian Optimisation - 75 Iterations for Each Algorithm
# Machine Learning Algorithm #1 - Define ranges of Hyperparameters
ml1_bo = BayesianOptimization(cross_validation, {"max_features": (1, 20),
                                                 "criterion": (0, 1),
                                                 "normv": (1, 1),
                                                 "max_depth": (1, 40),
                                                 "n_estimators": (100, 300),
                                                 "log_y": (1, 1)})
ml1_bo.explore({"max_features": [3.0],
                "criterion": [0],
                "normv": [1],
                "max_depth": [15],
                "n_estimators": [50],
                "log_y": [1]})
# Machine Learning Algorithm #2 - Define ranges of Hyperparameters
ml2_bo = BayesianOptimization(cross_validation2, {"n_neighbors": (2, 20),
                                                  "leaf_size": (10, 60),
                                                  "normv": (1, 1),
                                                  "log_y": (1, 1)})
ml2_bo.explore({"n_neighbors": [5],
                "leaf_size": [20],
                "normv": [1],
                "log_y": [1]})
# Optimisation of Machine Learning Algorithm #1 = RandomForestRegressor
ml1_bo.maximize(init_points=75, n_iter=1)
# Optimisation of Machine Learning Algorithm #2 = KNeighborsRegressor
ml2_bo.maximize(init_points=75, n_iter=1)
Esempio n. 27
0
def run(gpunum, cancer_type, feature_type, attempt):

    batch_size = 32
    epochs = 10

    os.environ["CUDA_VISIBLE_DEVICES"] = gpunum

    def get_session(gpu_fraction=1):
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=gpu_fraction, allow_growth=True)
        return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    ktf.set_session(get_session())

    results = []

    def scoreofModel(cancer_type, feature_type, attempt):
        def inner_SoM(pca, dropout, hidden_dims, ae_dim1, ae_dim2):
            print("**scoreofModel pca " + str(pca) + " dropout " +
                  str(dropout) + " hidden dims " + str(hidden_dims) +
                  " dim1 " + str(ae_dim1) + " dim2 " + str(ae_dim2))
            print("ct %s ft %s attempt %d" %
                  (cancer_type, feature_type, attempt))

            hidden_dims = int(hidden_dims)
            ae_dim1 = int(ae_dim1)
            ae_dim2 = int(ae_dim2)

            # AE
            with open('../test_bong/data/overlap_%s.pkl' % (cancer_type),
                      'rb') as handle:
                labels = pickle.load(handle)
                x = pickle.load(handle)
                y = pickle.load(handle)
            x_trn, x_tst, c_trn, c_tst, s_trn, s_tst, l_trn, l_tst = \
                train_test_split(x, y[:, 0], y[:, 1], labels, test_size=80, random_state=7)
            x_trn, x_tst = AE_again_read.AE_model_save(cancer_type,
                                                       feature_type, ae_dim1,
                                                       ae_dim2, x_trn, x_tst)
            clf = PCA(pca, whiten=True)
            x_trn = clf.fit_transform(x_trn)
            x_tst = clf.transform(x_tst)

            x_trn, x_dev, c_trn, c_dev, s_trn, s_dev, l_trn, l_dev = train_test_split(
                x_trn, c_trn, s_trn, l_trn, test_size=20, random_state=7)
            data = tuple((x_trn, c_trn, s_trn, x_dev, c_dev, s_dev, x_tst,
                          c_tst, s_tst))

            def ModelV1(model_input):
                z = Dropout(dropout)(model_input)
                z = Dense(hidden_dims, activation='relu')(z)
                z = Dropout(dropout)(z)
                z = Dense(hidden_dims, activation='relu')(z)
                model_output = Dense(1, activation=None)(z)
                model = Model(model_input, model_output)
                #model.compile(loss=my_cindex(c_tst, s_tst), optimizer='adam')#,metrics=["mse"])
                model.compile(loss="mse", optimizer='adam')
                return model

            feature_dim = x_trn.shape[1]
            input_shape = (feature_dim, )
            model_input = Input(shape=input_shape)
            model = ModelV1(model_input)

            model.summary()
            model_filepath = '../model/%s-%s-%d-%s-%s-%d-%d-%d.model' % (
                cancer_type, feature_type, attempt, str(pca), str(dropout),
                hidden_dims, ae_dim1, ae_dim2)
            checkpoint = MyCallback(results,
                                    model_filepath,
                                    data,
                                    real_save=True,
                                    verbose=1,
                                    save_best_only=True,
                                    mode='auto',
                                    cancer_type=cancer_type,
                                    feature_type=feature_type,
                                    thr=pca,
                                    dropout_prob=dropout,
                                    dimension=hidden_dims,
                                    activate='relu',
                                    AE1=ae_dim1,
                                    AE2=ae_dim2)
            callbacks_list = [checkpoint]

            history = model.fit(x_trn,
                                s_trn,
                                batch_size=batch_size,
                                shuffle=True,
                                callbacks=callbacks_list,
                                epochs=epochs,
                                validation_data=(x_dev, s_dev))

            #print("-----History----")
            #print(history.history.keys())
            #print(history.history)
            #print(len(history.history['val_loss']))

            pred_tst = model.predict(x_tst)

            return my_cindex(c_tst, s_tst)(s_tst, pred_tst)

        return inner_SoM

    def frange(x, y, jump):
        while x < y:
            yield x
            x += jump

    bo_dict = {
        "pca": (0.98, 0.9999),
        "dropout": (0, 0.8),
        "hidden_dims": (10, 1000),
        "ae_dim1": (100, 1500),
        "ae_dim2": (100, 700)
    }

    #for k in bo_dict.keys() :
    #    print(k)
    #    print (bo_dict[k])

    #scoreofModel(**{'ae_dim1': 1138.0196836044008, 'dropout': 0.18242910081095307, 'pca': 0.98912275449631237, 'hidden_dims': 373.61768597111694, 'ae_dim2': 472.20225514485821})

    v1BO = BayesianOptimization(scoreofModel(cancer_type, feature_type,
                                             attempt),
                                bo_dict,
                                verbose=True)

    v1BO.explore({
        "pca": [0.98, 0.1, 0.9999],
        "dropout": [0, 0.2, 0.8],
        "hidden_dims": [10, 200, 1000],
        "ae_dim1": [100, 300, 1500],
        "ae_dim2": [100, 100, 700],
    })

    gp_params = {"alpha": 1e-5}

    v1BO.maximize(init_points=2, n_iter=30, acq='ucb', kappa=5)

    print('Final Results')
    #print('max %f' % v1BO.res['max']['max_val'])
    #print('***<max>****')
    #print(v1BO.res['max'])
    #print('***<all>***')
    #print(v1BO.res['all'])
    results.append(v1BO.res['all'])
    #print(results)
    print(v1BO.res)

    with open('./BO_Result_' + cancer_type + '.txt', 'at') as f:

        params = v1BO.res['all']['params']
        values = v1BO.res['all']['values']
        keys = params[0].keys()

        for i in range(2):
            line = [cancer_type, feature_type]

            for k in keys:
                line.append(str(params[i][k]))
            line.append(str(values[i]))
            f.write('\t'.join(line) + '\n')
Esempio n. 28
0
    x1 = np.array(xtrain)[idx1, :][0]
    y0 = np.array(ytrain)[idx0]
    y1 = np.array(ytrain)[idx1]

    nb_classes = 2
    dims = xtrain.shape[1]
    print(dims, 'dims')

    kerasBO = BayesianOptimization(
        kerascv, {
            'dense1': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])),
            'dropout1': (0.05, 0.5),
            'dense2': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])),
            'dropout2': (0.05, 0.5),
            'epochs': (int(20), int(150))
        })
    kerasBO.explore({
        'dense1': [int(0.15 * xtrain.shape[1])],
        'dropout1': [0.05],
        'dense2': [int(1.5 * xtrain.shape[1])],
        'dropout2': [0.5],
        'epochs': [40]
    })

    kerasBO.maximize(init_points=3, n_iter=25)
    print('-' * 53)

    print('Final Results')
    print('Extra Trees: %f' % kerasBO.res['max']['max_val'])
    print(kerasBO.res['max']['max_params'])
Esempio n. 29
0
from bayes_opt import BayesianOptimization
# Example of how to use this bayesian optimization package.

# Lets find the maximum of a simple quadratic function of two variables
# We create the bayes_opt object and pass the function to be maximized
# together with the parameters names and their bounds.
bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1, {
    'x': (-4, 4),
    'y': (-3, 3)
})

# One of the things we can do with this object is pass points
# which we want the algorithm to probe. A dictionary with the
# parameters names and a list of values to include in the search
# must be given.
bo.explore({'x': [-1, 3], 'y': [-2, 2]})

# Additionally, if we have any prior knowledge of the behaviour of
# the target function (even if not totally accurate) we can also
# tell that to the optimizer.
# Here we pass a dictionary with target values as keys of another
# dictionary with parameters names and their corresponding value.
bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}})

# Once we are satisfied with the initialization conditions
# we let the algorithm do its magic by calling the maximize()
# method.
bo.maximize(init_points=5, n_iter=15, kappa=3.29)

# The output values can be accessed with self.res
print(bo.res['max'])
def target(**inargs):
    ordered_values = [inargs[param_name] for param_name in param_names]
    acc =  acc_dict[np.array(ordered_values).tostring()]
    return acc
    
init_dict = OrderedDict()
for i, param_name in enumerate(param_names):
    init_dict[param_name] = (min(param_ranges[i]), max(param_ranges[i]))
     
bo = BayesianOptimization(target,   init_dict, verbose=0)

done_params = np.reshape(results[:,:-1], (results.shape[0], nparam))
param_dict = OrderedDict()
for i, param_name in enumerate(param_names):
    param_dict[param_name] = done_params[:, i]    
bo.explore(param_dict)

##################################################
# main loop
for iter in range(max_iter):
    #you can tune the gp parameters and bo parameters
    #when acq='ucb', set kappa within [10^-3, 10^-2, ..., 10^3]
    #when acq='poi' or 'ei', set xi within [10^-3, 10^-2, ..., 10^3]
    gp_params = {'kernel': None, 'alpha': 1e-5}
    bo.maximize(init_points=0, n_iter=0, acq='poi', xi=0.01,  **gp_params)
    utility = bo.util.utility(all_params, bo.gp, 0)
    sort_indices = np.argsort(utility)
    sort_indices = sort_indices[:: -1]
    
    for tmp_index in sort_indices:
        next_params = all_params[tmp_index]
                                   n_informative=12,
                                   n_redundant=7)

def svccv(C, gamma):
    return cross_val_score(SVC(C=C, gamma=gamma, random_state=2),
                           data, target, 'roc_auc', cv=5).mean()

def rfccv(n_estimators, min_samples_split, max_features):
    return cross_val_score(RFC(n_estimators=int(n_estimators),
                               min_samples_split=int(min_samples_split),
                               max_features=min(max_features, 0.999),
                               random_state=2),
                           data, target, 'roc_auc', cv=5).mean()

if __name__ == "__main__":

    svcBO = BayesianOptimization(svccv, {'C': (0.001, 100), 'gamma': (0.0001, 0.1)})
    svcBO.explore({'C': [0.001, 0.01, 0.1], 'gamma': [0.001, 0.01, 0.1]})

    rfcBO = BayesianOptimization(rfccv, {'n_estimators': (10, 250),
                                         'min_samples_split': (2, 25),
                                         'max_features': (0.1, 0.999)})
    svcBO.maximize(acq='xcxcxc')

    print('-'*53)
    #---------------------------------------------------------- rfcBO.maximize()
#------------------------------------------------------------------------------ 
    #------------------------------------------------------------- print('-'*53)
    #---------------------------------------------------- print('Final Results')
    #---------------------------- print('SVC: %f' % svcBO.res['max']['max_val'])
    #---------------------------- print('RFC: %f' % rfcBO.res['max']['max_val'])
Esempio n. 32
0
def hypersearch_probs():
    prob_paths  = paths['probs']
    prob1_paths = paths['probs1']

    # https://github.com/fmfn/BayesianOptimization
    # https://github.com/fmfn/BayesianOptimization/blob/master/examples/usage.py
    # https://github.com/fmfn/BayesianOptimization/blob/master/examples/exploitation%20vs%20exploration.ipynb
    # subx = [0, 1, 2, 3, 4, 5]
    subx = [2, 4, 5, 9, 10, 14, 17, 18, 20, 30, 33, 39, 61, 71, 72, 73, 75, 81, 84]
    from bayes_opt import BayesianOptimization

    def best(self):
        return {'max_val': self.Y.max(),
                'max_params': dict(zip(self.keys,
                                       self.X[self.Y.argmax()]))}

    def seeded_objective(**params):
        seed_thresh, mask_thresh, min_seed_size, min_size = ub.take(
            params, 'seed_thresh, mask_thresh, min_seed_size, min_size'.split(', '))
        fscores = []
        for path, path1 in zip(ub.take(prob_paths, subx), ub.take(prob1_paths, subx)):
            gti, uncertain, dsm, bgr = gt_info_from_path(path)

            probs = np.load(path)['arr_0']
            seed_probs = probs[:, :, task.classname_to_id['inner_building']]
            seed = (seed_probs > seed_thresh).astype(np.uint8)

            probs1 = np.load(path1)['arr_0']
            mask_probs = probs1[:, :, 1]
            mask = (mask_probs > mask_thresh).astype(np.uint8)

            pred = seeded_instance_label(seed, mask,
                                         min_seed_size=min_seed_size,
                                         min_size=min_size)
            scores = instance_fscore(gti, uncertain, dsm, pred)
            fscore = scores[0]
            fscores.append(fscore)
        mean_fscore = np.mean(fscores)
        return mean_fscore

    seeded_bounds = {
        'mask_thresh': (.4, .9),
        'seed_thresh': (.4, .9),
        'min_seed_size': (0, 100),
        'min_size': (0, 100),
    }
    n_init = 50
    seeded_bo = BayesianOptimization(seeded_objective, seeded_bounds)
    seeded_bo.explore(pd.DataFrame([
        {'mask_thresh': 0.9000, 'min_seed_size': 100.0000, 'min_size': 100.0000, 'seed_thresh': 0.4000},
        {'mask_thresh': 0.8, 'seed_thresh': 0.5, 'min_seed_size': 20, 'min_size': 0},
        {'mask_thresh': 0.5, 'seed_thresh': 0.8, 'min_seed_size': 20, 'min_size': 0},
        {'mask_thresh': 0.8338, 'min_seed_size': 25.7651, 'min_size': 38.6179, 'seed_thresh': 0.6573},
        {'mask_thresh': 0.6225, 'min_seed_size': 93.2705, 'min_size': 5, 'seed_thresh': 0.4401},
        {'mask_thresh': 0.7870, 'min_seed_size': 85.1641, 'min_size': 64.0634, 'seed_thresh': 0.4320},
        {'mask_thresh': 0.8367, 'seed_thresh': 0.4549, 'min_seed_size': 97, 'min_size': 33},  # 'max_val': 0.8708
        {'mask_thresh': 0.7664, 'min_seed_size': 48.5327, 'min_size': 61.8757, 'seed_thresh': 0.4090},  # 'max_val': 0.9091}
        {'mask_thresh': 0.8367, 'min_seed_size': 97.0000, 'min_size': 33.0000, 'seed_thresh': 0.4549},  # max_val': 0.8991
    ]).to_dict(orient='list'))
    seeded_bo.plog.print_header(initialization=True)
    seeded_bo.init(n_init)
    print(ub.repr2(best(seeded_bo), nl=0, precision=4))

    print('seeded ' + ub.repr2(best(seeded_bo), nl=0, precision=4))
    print('inner ' + ub.repr2(best(inner_bo), nl=0, precision=4))
    print('outer ' + ub.repr2(best(outer_bo), nl=0, precision=4))

    # {'max_params': {'thresh': 0.8000, 'min_size': 0.0000}, 'max_val': 0.6445}
    gp_params = {"alpha": 1e-5, "n_restarts_optimizer": 2}

    n_iter = n_init // 2
    for kappa in [10, 5, 1]:
        seeded_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params)
        inner_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params)
        outer_bo.maximize(n_iter=n_iter, acq='ucb', kappa=kappa, **gp_params)

    print('seeded ' + ub.repr2(best(seeded_bo), nl=0, precision=4))
    print('inner ' + ub.repr2(best(inner_bo), nl=0, precision=4))
    print('outer ' + ub.repr2(best(outer_bo), nl=0, precision=4))
    print(arch)
Esempio n. 33
0
from bayes_opt import BayesianOptimization
'''
Example of how to use this bayesian optimization package.
'''

# Lets find the maximum of a simple quadratic function of two variables
# We create the bayes_opt object and pass the function to be maximized
# together with the parameters names and their bounds.
bo = BayesianOptimization(lambda x, y: -x**2 - (y - 1)**2 + 1,
                          {'x': (-4, 4), 'y': (-3, 3)})

# One of the things we can do with this object is pass points
# which we want the algorithm to probe. A dictionary with the
# parameters names and a list of values to include in the search
# must be given.
bo.explore({'x': [-1, 3], 'y': [-2, 2]})

# Additionally, if we have any prior knowledge of the behaviour of
# the target function (even if not totally accurate) we can also
# tell that to the optimizer.
# Here we pass a dictionary with target values as keys of another
# dictionary with parameters names and their corresponding value.
bo.initialize({-2: {'x': 1, 'y': 0}, -1.251: {'x': 1, 'y': 1.5}})

# Once we are satisfied with the initialization conditions
# we let the algorithm do its magic by calling the maximize()
# method.
bo.maximize(init_points=15, n_iter=25)

# The output values can be accessed with self.res
print(bo.res['max'])
Esempio n. 34
0
def bayesopt_under_true_model(seed, info, quantile, mc_reps=1000, T=50):
    np.random.seed(seed)
    env = Bandit.NormalMAB(list_of_reward_mus=[0.3, 0.6],
                           list_of_reward_vars=[0.1**2, 0.1**2])
    pre_simulated_data = env.generate_mc_samples(mc_reps, T)
    rollout_function_kwargs = {'pre_simulated_data': pre_simulated_data}

    rollout_function = mab_rollout_with_fixed_simulations
    policy = mab_epsilon_greedy_policy

    if info:
        bounds = {
            'zeta0': (0.05, 2.0),
            'zeta1': (-5.0, 5.0),
            'zeta2': (-5.0, 5.0),
            'zeta3': (-5.0, 5.0),
            'zeta4': (-5.0, 5.0),
            'zeta5': (-5.0, 5.0),
            'zeta6': (-5.0, 5.0),
            'zeta7': (-5.0, 5.0),
            'zeta8': (-5.0, 5.0)
        }
        explore_ = {
            'zeta0': [0.05, 0.1, 0.0, 1.0, 0.1],
            'zeta1': [0.0, 0.0, 0.0, 0.0, -122.5],
            'zeta2': [0.0, 0.0, 0.0, 0.0, 0.0],
            'zeta3': [0.0, 0.0, 0.0, 0.0, 0.0],
            'zeta4': [0.0, 0.0, 0.0, 0.0, 0.0],
            'zeta5': [0.0, 0.0, 0.0, 0.0, 2.5],
            'zeta6': [0.0, 0.0, 0.0, 0.0, 0.0],
            'zeta7': [0.0, 0.0, 0.0, 0.0, 0.0],
            'zeta8': [0.0, 0.0, 0.0, 0.0, 0.0]
        }
        tuning_function = tuned_bandit.information_expit_epsilon_decay

        def objective(zeta0, zeta1, zeta2, zeta3, zeta4, zeta5, zeta6, zeta7,
                      zeta8):
            zeta = np.array([
                zeta0, zeta1, zeta2, zeta3, zeta4, zeta5, zeta6, zeta7, zeta8
            ])
            return rollout_function(zeta, policy, T, tuning_function, env,
                                    info, quantile, **rollout_function_kwargs)
    else:
        bounds = {
            'zeta0': (0.05, 2.0),
            'zeta1': (1.0, 49.0),
            'zeta2': (0.01, 2.5)
        }
        explore_ = {
            'zeta0': [1.0, 0.05, 1.0, 0.1],
            'zeta1': [50.0, 49.0, 1.0, 49.0],
            'zeta2': [0.1, 2.5, 1.0, 2.5]
        }
        tuning_function = tuned_bandit.expit_epsilon_decay

        def objective(zeta0, zeta1, zeta2):
            zeta = np.array([zeta0, zeta1, zeta2])
            return rollout_function(zeta, policy, T, tuning_function, env,
                                    info, quantile, **rollout_function_kwargs)

    bo = BayesianOptimization(objective, bounds)
    bo.explore(explore_)
    bo.maximize(init_points=50, n_iter=50, alpha=1e-4)
    #  bo.maximize(init_points=10, n_iter=15, alpha=1e-4)
    best_param = bo.res['max']['max_params']
    best_param = np.array(
        [best_param['zeta{}'.format(i)] for i in range(len(bounds))])
    print(best_param)
    return best_param
Esempio n. 35
0
    idx0 = np.where(fold_index != 1)
    idx1 = np.where(fold_index == 1)
    x0 = np.array(xtrain)[idx0,:][0]
    x1 = np.array(xtrain)[idx1,:][0]
    y0 = np.array(ytrain)[idx0]
    y1 = np.array(ytrain)[idx1]

    nb_classes = 2
    dims = xtrain.shape[1]
    print(dims, 'dims')

    kerasBO = BayesianOptimization(kerascv,
                                   {'dense1': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])),
                                    'dropout1': (0.05, 0.5),
                                    'dense2': (int(0.15 * xtrain.shape[1]), int(2 * xtrain.shape[1])),
                                    'dropout2': (0.05, 0.5),
                                    'epochs': (int(20), int(150))
                                    })
    kerasBO.explore({'dense1': [int(0.15 * xtrain.shape[1])],
                     'dropout1': [0.05],
                     'dense2': [int(1.5 * xtrain.shape[1])],
                     'dropout2': [0.5],
                     'epochs': [40]})

    kerasBO.maximize(init_points=3, n_iter=25)
    print('-' * 53)

    print('Final Results')
    print('Extra Trees: %f' % kerasBO.res['max']['max_val'])
    print(kerasBO.res['max']['max_params'])
Esempio n. 36
0
        val = 0.
    return val


def rfccv(n_estimators, min_samples_split, max_features):
    val = cross_val_score(RFC(n_estimators=int(n_estimators),
                              min_samples_split=int(min_samples_split),
                              max_features=min(max_features, 0.999),
                              random_state=2),
                          data,
                          target,
                          'f1',
                          cv=2).mean()
    return val


if __name__ == "__main__":
    gp_params = {"alpha": 1e-5}
    SA = SA()
    svcBO = BayesianOptimization(svccv, {'C': (0., 1.)})
    svcBO.explore({'C': SA})
    #svcBO.explore({'C':[0.1,0.2,0.5,0.9]})

    svcBO.maximize(n_iter=5, **gp_params)
    print('-' * 53)

    print('-' * 53)
    print('Final Results')
    print('SVC: %f' % svcBO.res['max']['max_val'])
    print('SVC: %s' % list(svcBO.res['max']['max_params'].values())[0])