def test_get_base_storage(storage_specifier): with Client(): with get_storage_url(storage_specifier) as url: dask_storage = dask_optuna.DaskStorage(url) storage = dask_storage.get_base_storage() expected_type = type(optuna.storages.get_storage(url)) assert isinstance(storage, expected_type)
def test_optuna_joblib_backend(storage_specifier, processes): with Client(processes=processes): with get_storage_url(storage_specifier) as url: storage = dask_optuna.DaskStorage(url) study = optuna.create_study(storage=storage) with joblib.parallel_backend("dask"): study.optimize(objective, n_trials=10, n_jobs=-1) assert len(study.trials) == 10
async def test_in_memory(c, s, a, b): storage = None dask_storage = dask_optuna.DaskStorage(storage=storage) futures = [c.submit(_optimize, storage=storage, pure=False) for _ in range(5)] await wait(futures) await futures[0] results = await dask_storage.get_all_study_summaries() assert len(results) == 1 assert results[0].n_trials == 10
async def test_sqlite(c, s, a, b): with tempfile.TemporaryDirectory() as tmpdirname: storage = "sqlite:///" + os.path.join(tmpdirname, "example.db") dask_storage = dask_optuna.DaskStorage(storage=storage) futures = [c.submit(_optimize, storage=storage, pure=False) for _ in range(5)] await wait(futures) await futures[0] results = await dask_storage.get_all_study_summaries() assert len(results) == 1 assert results[0].n_trials == 10
def test_study_direction_best_value(processes, direction): # Regression test for https://github.com/jrbourbeau/dask-optuna/issues/15 pytest.importorskip("pandas") with Client(processes=processes): dask_storage = dask_optuna.DaskStorage() study = optuna.create_study(storage=dask_storage, direction=direction) with joblib.parallel_backend("dask"): study.optimize(objective, n_trials=10, n_jobs=-1) # Ensure that study.best_value matches up with the expected value from # the trials DataFrame trials_value = study.trials_dataframe()["value"] if direction == "maximize": expected = trials_value.max() else: expected = trials_value.min() np.testing.assert_allclose(expected, study.best_value)
param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True) param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True) bst = xgb.train(param, dtrain) preds = bst.predict(dtest) pred_labels = np.rint(preds) accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels) return accuracy if __name__ == "__main__": with coiled.Cluster(n_workers=5, configuration="jrbourbeau/optuna") as cluster: with Client(cluster) as client: print(f"Dask dashboard is available at {client.dashboard_link}") client.wait_for_workers(5) storage = dask_optuna.DaskStorage("sqlite:///coiled-example.db") study = optuna.create_study(storage=storage, direction="maximize") with joblib.parallel_backend("dask"): study.optimize(objective, n_trials=100, n_jobs=-1) print("Best params:") pprint(study.best_params)
""" Example to demonstrate using Dask-Optuna with Optuna's Joblib internals to run optimization trials on a Dask cluster in parallel. """ import optuna import joblib from dask.distributed import Client import dask_optuna optuna.logging.set_verbosity(optuna.logging.WARN) def objective(trial): x = trial.suggest_uniform("x", -10, 10) return (x - 2)**2 if __name__ == "__main__": with Client() as client: print(f"Dask dashboard is available at {client.dashboard_link}") dask_storage = dask_optuna.DaskStorage() study = optuna.create_study(storage=dask_storage) with joblib.parallel_backend("dask"): study.optimize(objective, n_trials=500, n_jobs=-1) print(f"best_params = {study.best_params}")
c = Client(cluster) # Query the client for all connected workers workers = c.has_what().keys() n_workers = len(workers) df = cudf.read_csv(os.path.join(data_dir, "train.csv")) N_TRIALS = 5 # Drop non-numerical data and fill NaNs before passing to cuML RF CAT_COLS = list(df.select_dtypes('object').columns) df = df.drop(CAT_COLS, axis=1) df = df.fillna(0) df = df.astype("float32") X, y = df.drop(["target"], axis=1), df["target"].astype('int32') study_name = "dask_optuna_lr_log_loss_tpe" storage_name = "sqlite:///study_stores.db" storage = dask_optuna.DaskStorage(storage_name) study = optuna.create_study(sampler=optuna.samplers.TPESampler(), study_name=study_name, direction="minimize", storage=storage) # Optimize in parallel on your Dask cluster with parallel_backend("dask"): study.optimize(lambda trial: objective(trial, X, y), n_trials=N_TRIALS, n_jobs=n_workers) print('Best params{} and best score{}'.format(study.best_params, study.best_value))
def _optimize(storage): dask_storage = dask_optuna.DaskStorage(storage=storage) study = optuna.create_study( study_name="foo", storage=dask_storage, load_if_exists=True ) study.optimize(objective, n_trials=2)