def test_checks_optimizer(): with pytest.raises(ValueError): opt = Hyperactive(X, y) opt.search(search_config, optimizer=1)
def test_verbosity3(): opt = Hyperactive(X, y, verbosity=1, memory=memory) opt.search(search_config)
def test_warm_start_multiple(): opt = Hyperactive(X, y, memory="short") opt.search(search_config, n_iter=10, n_jobs=2)
data = load_breast_cancer() X, y = data.data, data.target def model(para, X, y): gbc = GradientBoostingClassifier(n_estimators=para["n_estimators"], max_depth=para["max_depth"]) mlp = MLPClassifier(hidden_layer_sizes=para["hidden_layer_sizes"]) svc = SVC(gamma="auto", probability=True) eclf = EnsembleVoteClassifier(clfs=[gbc, mlp, svc], weights=[2, 1, 1], voting="soft") scores = cross_val_score(eclf, X, y, cv=3) return scores.mean() search_config = { model: { "n_estimators": range(10, 100, 10), "max_depth": range(2, 12), "hidden_layer_sizes": (range(10, 100, 10), ), } } opt = Hyperactive(search_config, n_iter=30) opt.search(X, y)
def test_max_time(): opt0 = Hyperactive(X, y, memory=memory) opt0.search(search_config, max_time=0.00001)
X, y = data.data, data.target def model(opt): knr = KNeighborsClassifier(n_neighbors=opt["n_neighbors"]) scores = cross_val_score(knr, X, y, cv=5) score = scores.mean() return score search_space = { "n_neighbors": list(range(1, 100)), } hyper = Hyperactive() hyper.add_search(model, search_space, n_iter=100) hyper.run() results = hyper.results(model) values = results[list(search_space.keys())].values scores = results["score"].values warm_start_smbo = (values, scores) optimizer = EnsembleOptimizer( estimators=[SVR(), DecisionTreeRegressor(), MLPRegressor()], xi=0.02, warm_start_smbo=warm_start_smbo, rand_rest_p=0.05,
def test_n_jobs_4(): opt = Hyperactive(X, y, memory=memory) opt.search(search_config, n_jobs=4)
def test_joblib_0(): hyper = Hyperactive(distribution="joblib") hyper.add_search(objective_function, search_space, n_iter=15, n_jobs=2) hyper.run()
optimizer.step() # Validation of the model. model.eval() correct = 0 with torch.no_grad(): for batch_idx, (data, target) in enumerate(valid_loader): # Limiting validation data. if batch_idx * BATCHSIZE >= N_VALID_EXAMPLES: break data, target = data.view(data.size(0), -1).to(DEVICE), target.to(DEVICE) output = model(data) # Get the index of the max log-probability. pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() accuracy = correct / min(len(valid_loader.dataset), N_VALID_EXAMPLES) return accuracy search_space = { "linear.0": list(range(10, 200, 10)), "linear.1": list(range(10, 200, 10)), } hyper = Hyperactive() hyper.add_search(pytorch_cnn, search_space, n_iter=5) hyper.run()
def test_n_jobs_3(): hyper = Hyperactive() hyper.add_search(objective_function, search_space, n_iter=15, n_jobs=-1) hyper.run()
def test_multiprocessing_0(): hyper = Hyperactive(distribution="multiprocessing") hyper.add_search(objective_function, search_space, n_iter=15, n_jobs=2) hyper.run()
"ParticleSwarm", "EvolutionStrategy", # "Bayesian", ] losses = [] for optimizer in tqdm.tqdm(optimizer_list): loss_opt = [] for search_config in search_config_list: loss_avg = [] for i in range(10): opt = Hyperactive(X, y, memory="short", random_state=i, verbosity=0) opt.search(search_config, n_iter=100, optimizer=optimizer) model = list(search_config.keys())[0] loss = opt.best_scores[model] loss_avg.append(loss) loss_avg = np.array(loss_avg).mean() loss_opt.append(loss_avg) loss_opt = np.array(loss_opt).sum() losses.append(loss_opt)
from sklearn.model_selection import cross_val_score from xgboost import XGBClassifier from sklearn.datasets import load_breast_cancer from hyperactive import Hyperactive data = load_breast_cancer() X, y = data.data, data.target def model(opt): xgb = XGBClassifier( n_estimators=opt["n_estimators"], max_depth=opt["max_depth"], learning_rate=opt["learning_rate"], ) scores = cross_val_score(xgb, X, y, cv=3) return scores.mean() search_space = { "n_estimators": list(range(10, 200, 10)), "max_depth": list(range(2, 12)), "learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0], } hyper = Hyperactive() hyper.add_search(model, search_space, n_iter=30) hyper.run()
def test_checks_n_jobs(): with pytest.raises(ValueError): opt = Hyperactive(X, y) opt.search(search_config, n_jobs=0.1)
def cnn(para, X_train, y_train): model = Sequential() model.add(Flatten()) model.add(Dense(para["Dense.0"])) model.add(Activation("relu")) model.add(Dropout(para["Dropout.0"])) model.add(Dense(10)) model.add(Activation("softmax")) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) model.fit(X_train, y_train, epochs=25, batch_size=128) _, score = model.evaluate(x=X_test, y=y_test) return score search_config = { cnn: { "Dense.0": range(100, 1000, 100), "Dropout.0": np.arange(0.1, 0.9, 0.1) } } opt = Hyperactive(search_config, n_iter=5) opt.search(X_train, y_train)
def test_memory_warm_start_manual(): data = load_breast_cancer() X, y = data.data, data.target def objective_function(optimizer): dtc = GradientBoostingClassifier( n_estimators=optimizer.suggested_params["n_estimators"], ) scores = cross_val_score(dtc, X, y, cv=5) return scores.mean() search_space = { "n_estimators": np.arange(500, 502), } c_time_1 = time.time() hyper = Hyperactive() hyper.add_search(objective_function, search_space, n_iter=1) hyper.run() diff_time_1 = time.time() - c_time_1 memory_warm_start = pd.DataFrame([[500, 0.9], [501, 0.91]], columns=["n_estimators", "score"]) c_time = time.time() hyper0 = Hyperactive() hyper0.add_search( objective_function, search_space, n_iter=10, memory_warm_start=memory_warm_start, ) hyper0.run() diff_time = time.time() - c_time assert diff_time_1 > diff_time * 0.3
def test_ltm_1(objective_function, search_space, path): (search_space, compare) = search_space print("\n objective_function \n", objective_function) print("\n search_space \n", search_space) print("\n compare \n", compare) print("\n path \n", path) model_name = str(objective_function.__name__) memory = LongTermMemory(model_name, path=path) hyper1 = Hyperactive() hyper1.add_search( objective_function, search_space, n_iter=10, initialize={"random": 1}, long_term_memory=memory, ) hyper1.run() results1 = hyper1.results(objective_function) hyper2 = Hyperactive() hyper2.add_search( objective_function, search_space, n_iter=10, initialize={"random": 1}, long_term_memory=memory, ) hyper2.run() results2 = hyper2.results(objective_function) memory.remove_model_data() print("\n results1 \n", results1) print("\n results2 \n", results2)
def test_memory_timeSave_0(): data = load_breast_cancer() X, y = data.data, data.target def objective_function(optimizer): dtc = DecisionTreeClassifier( min_samples_split=optimizer.suggested_params["min_samples_split"]) scores = cross_val_score(dtc, X, y, cv=5) return scores.mean() search_space = { "min_samples_split": np.arange(2, 20), } c_time1 = time.time() hyper = Hyperactive() hyper.add_search(objective_function, search_space, n_iter=100) hyper.run() diff_time1 = time.time() - c_time1 c_time2 = time.time() hyper = Hyperactive() hyper.add_search(objective_function, search_space, n_iter=100, memory=False) hyper.run() diff_time2 = time.time() - c_time2 assert diff_time1 < diff_time2 * 0.8
} # create an instance of the ProgressBoard progress_board1 = ProgressBoard() """ Maybe you do not want to have the information of both searches on the same browser tab? If you want to open multiple progres board tabs at the same time you can just create as many instances of the ProgressBoard-class as you want and pass it two the corresponding searches. """ # progress_board2 = ProgressBoard() """ uncomment the line above and pass progress_board2 to one .add_search(...) to open two browser tabs at the same time """ hyper = Hyperactive() hyper.add_search( model_gbr, search_space_gbr, n_iter=200, n_jobs=2, # the progress board works seamlessly with multiprocessing progress_board= progress_board1, # pass the instance of the ProgressBoard to .add_search(...) ) # if you add more searches to Hyperactive they will appear in the same progress board hyper.add_search( model_rfr, search_space_rfr, n_iter=200, n_jobs=4, progress_board=progress_board1,
def test_memory_warm_start(): data = load_breast_cancer() X, y = data.data, data.target def objective_function(optimizer): dtc = DecisionTreeClassifier( max_depth=optimizer.suggested_params["max_depth"], min_samples_split=optimizer.suggested_params["min_samples_split"], ) scores = cross_val_score(dtc, X, y, cv=5) return scores.mean() search_space = { "max_depth": np.arange(1, 10), "min_samples_split": np.arange(2, 20), } c_time1 = time.time() hyper0 = Hyperactive() hyper0.add_search(objective_function, search_space, n_iter=300) hyper0.run() diff_time1 = time.time() - c_time1 c_time2 = time.time() results0 = hyper0.results(objective_function) hyper1 = Hyperactive() hyper1.add_search( objective_function, search_space, n_iter=300, memory_warm_start=results0, ) hyper1.run() diff_time2 = time.time() - c_time2 assert diff_time2 < diff_time1 * 0.5
def test_positional_args(): opt0 = Hyperactive(X, y, random_state=False, memory=memory) opt0.search(search_config) opt1 = Hyperactive(X, y, random_state=1, memory=memory) opt1.search(search_config) opt2 = Hyperactive(X, y, random_state=1, memory=memory) opt2.search(search_config)
def collect_data(runs, X, y, sklearn_model, opt_list, search_config, n_iter): time_c = time.time() data_runs = [] for run in range(runs): print("\nRun nr.", run, "\n") time_opt = [] start = time.perf_counter() for i in tqdm.tqdm(range(n_iter)): scores = cross_val_score( sklearn_model, X, y, scoring="accuracy", n_jobs=1, cv=cv ) time_ = time.perf_counter() - start time_opt.append(time_) # data["No Opt"]["0"] = time_ for opt_str in opt_list: print("optimizer:", opt_str, type(opt_str)) n_iter_temp = n_iter if opt_str == "ParallelTempering": n_iter_temp = int(n_iter / 4) if opt_str == "ParticleSwarm": n_iter_temp = int(n_iter / 10) if opt_str == "EvolutionStrategy": n_iter_temp = int(n_iter / 10) opt = Hyperactive(X, y, memory=False) start = time.perf_counter() opt.search(search_config, n_iter=n_iter_temp, optimizer=opt_str) time_ = time.perf_counter() - start time_opt.append(time_) time_opt = np.array(time_opt) time_opt = time_opt / n_iter # time_opt = np.expand_dims(time_opt_norm, axis=0) data_runs.append(time_opt) data_runs = np.array(data_runs) print("\nCreate Dataframe\n") print("data_runs", data_runs, data_runs.shape) column_names = ["No Opt."] + opt_list data = pd.DataFrame(data_runs, columns=column_names) model_name = list(search_config.keys())[0] calc_optimizer_time_name = ( "optimizer_calc_time_" + str(sklearn_model.__class__.__name__) + ".csv" ) file_name = str(calc_optimizer_time_name) data.to_csv(file_name, index=False) print("data collecting time:", time.time() - time_c)
def test_random_state(): opt0 = Hyperactive(X, y, random_state=False, memory=memory) opt0.search(search_config) opt1 = Hyperactive(X, y, random_state=0, memory=memory) opt1.search(search_config) opt2 = Hyperactive(X, y, random_state=1, memory=memory) opt2.search(search_config)
self.m = GPy.models.GPRegression(X, y, self.kernel) self.m.optimize(messages=True) def predict(self, X): return self.m.predict(X) class GPR1: def __init__(self): self.gpr = GaussianProcessRegressor(kernel=Matern(nu=2.5), normalize_y=True, n_restarts_optimizer=10) def fit(self, X, y): self.gpr.fit(X, y) def predict(self, X): return self.gpr.predict(X, return_std=True) opt = Hyperactive(X, y) opt.search(search_config, n_iter=30, optimizer="Bayesian") bayes_opt = {"Bayesian": {"gpr": GPR0()}} opt = Hyperactive(X, y) opt.search(search_config, n_iter=30, optimizer=bayes_opt) bayes_opt = {"Bayesian": {"gpr": GPR1()}} opt = Hyperactive(X, y) opt.search(search_config, n_iter=30, optimizer=bayes_opt)
def test_memory(): opt0 = Hyperactive(X, y, memory=True) opt0.search(search_config) opt1 = Hyperactive(X, y, memory=False) opt1.search(search_config) opt2 = Hyperactive(X, y, memory="short") opt2.search(search_config) opt3 = Hyperactive(X, y, memory="long") opt3.search(search_config) opt4 = Hyperactive(X, y, memory="long") opt4.search(search_config) opt = Hyperactive(X, y, memory=memory, verbosity=0) opt.search(search_config)
def model(para, X, y): knr = KNeighborsClassifier(n_neighbors=para["n_neighbors"]) scores = cross_val_score(knr, X, y, cv=5) score = scores.mean() return score search_space = { "n_neighbors": list(range(1, 100)), } optimizer = "EvolutionStrategy" hyper = Hyperactive(X, y) hyper.add_search(model, search_space, optimizer=optimizer, n_iter=100) hyper.run() optimizer = { "EvolutionStrategy": { "epsilon": 0.1, "distribution": "laplace", "individuals": 5, }, } hyper = Hyperactive(X, y) hyper.add_search(model, search_space, optimizer=optimizer, n_iter=100) hyper.run()
def test_verbosity5(): opt = Hyperactive(X, y, verbosity=2, memory=memory) opt.search(search_config, n_jobs=2)
nn.add(Flatten()) nn.add(Dense(para["neurons.0"])) nn.add(Activation("relu")) nn.add(Dropout(0.5)) nn.add(Dense(10)) nn.add(Activation("softmax")) nn.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) nn.fit(X_train, y_train, epochs=25, batch_size=128) _, score = nn.evaluate(x=X_test, y=y_test) return score search_space = { "conv_layer.0": [conv1, conv2, conv3], "filters.0": [16, 32, 64, 128], "neurons.0": list(range(100, 1000, 100)), } # make numpy array "C-contiguous". This is important for saving meta-data X_train = np.asarray(X_train, order="C") y_train = np.asarray(y_train, order="C") hyper = Hyperactive(X_train, y_train) hyper.add_search(cnn, search_space, n_iter=5) hyper.run()
def test_optimizer_args(): opt = Hyperactive(X, y, memory=memory) opt.search(search_config, optimizer={"HillClimbing": {"epsilon": 0.1}})
def test_checks_max_time(): with pytest.raises(ValueError): opt = Hyperactive(X, y) opt.search(search_config, max_time="1")