def plot_optimization_history(self, interactive=False): ''' Plot parameters optimization history. Parameters ---------- interactive : bool, optional Create & save to current wd interactive html plot. The default is False. Returns ------- None. ''' self._check_refit_status('plot_optimization_history()') validate_plotting_interactive_argument(interactive) if interactive: from optuna.visualization import plot_optimization_history fig = plot_optimization_history(self._study) fig.write_html("optimization_history_plot.html") try: self._display_html("optimization_history_plot.html") except Exception as e: print(f'Display html error: {e}') print( f'Optimization History Plot is saved to {os.path.join(os.getcwd(), "optimization_history_plot.html")}' ) else: from optuna.visualization.matplotlib import plot_optimization_history import matplotlib.pyplot as plt plot_optimization_history(self._study) plt.show()
def make_plots(logdir, study): logdir = f'{logdir}/plots' os.makedirs(logdir, exist_ok=True) plot_optimization_history(study).write_image(f'{logdir}/history.svg') plot_intermediate_values(study).write_image(f'{logdir}/intermediates.svg') plot_parallel_coordinate(study).write_image(f'{logdir}/parallel_coordinates.png') plot_slice(study).write_image(f'{logdir}/slices.svg') plot_param_importances(study).write_image(f'{logdir}/importances.svg')
def test_plot_optimization_history_with_error_bar(direction: str) -> None: n_studies = 10 # Test with no trial. studies = [create_study(direction=direction) for _ in range(n_studies)] figure = plot_optimization_history(studies, error_bar=True) assert len(figure.data) == 0 def objective(trial: Trial) -> float: if trial.number == 0: return 1.0 elif trial.number == 1: return 2.0 elif trial.number == 2: return 0.0 return 0.0 # Test with trials. studies = [create_study(direction=direction) for _ in range(n_studies)] for study in studies: study.optimize(objective, n_trials=3) figure = plot_optimization_history(studies, error_bar=True) assert len(figure.data) == 4 assert np.array_equal(figure.data[0].x, [0, 1, 2]) assert np.array_equal(figure.data[0].y, [1.0, 2.0, 0.0]) assert np.array_equal(figure.data[1].x, [0, 1, 2]) ydata = figure.data[1].y if direction == "minimize": assert np.array_equal(ydata, [1.0, 1.0, 0.0]) else: assert np.array_equal(ydata, [1.0, 2.0, 2.0]) # Scatters for error bar don't have `name`. legend_texts = [scatter.name for scatter in figure.data if scatter.name is not None] assert sorted(legend_texts) == ["Best Value", "Objective Value"] assert figure.layout.yaxis.title.text == "Objective Value" # Test customized target. with pytest.warns(UserWarning): figure = plot_optimization_history(studies, target=lambda t: t.number, error_bar=True) assert len(figure.data) == 1 assert np.array_equal(figure.data[0].x, [0, 1, 2]) assert np.array_equal(figure.data[0].y, [0, 1, 2]) # Test customized target name. custom_target_name = "Target Name" figure = plot_optimization_history(studies, target_name=custom_target_name, error_bar=True) legend_texts = [scatter.name for scatter in figure.data if scatter.name is not None] assert sorted(legend_texts) == ["Best Value", custom_target_name] assert figure.layout.yaxis.title.text == custom_target_name # Ignore failed trials. studies = [create_study(direction=direction) for _ in range(n_studies)] for study in studies: study.optimize(fail_objective, n_trials=1, catch=(ValueError,)) figure = plot_optimization_history(studies, error_bar=True) assert len(figure.data) == 0
def test_plot_optimization_history_with_multiple_studies(direction: str) -> None: n_studies = 10 # Test with no trial. studies = [create_study(direction=direction) for _ in range(n_studies)] figure = plot_optimization_history(studies) assert len(figure.data) == 0 def objective(trial: Trial) -> float: if trial.number == 0: return 1.0 elif trial.number == 1: return 2.0 elif trial.number == 2: return 0.0 return 0.0 # Test with trials. studies = [create_study(direction=direction) for _ in range(n_studies)] for study in studies: study.optimize(objective, n_trials=3) figure = plot_optimization_history(studies) assert len(figure.data) == 2 * n_studies assert figure.data[0].x == (0, 1, 2) assert figure.data[0].y == (1.0, 2.0, 0.0) assert figure.data[1].x == (0, 1, 2) if direction == "minimize": assert np.array_equal(figure.data[1].y, np.array([1.0, 1.0, 0.0])) else: assert np.array_equal(figure.data[1].y, np.array([1.0, 2.0, 2.0])) assert figure.data[0].name == f"Objective Value of {studies[0].study_name}" assert figure.layout.yaxis.title.text == "Objective Value" # Test customized target. with pytest.warns(UserWarning): figure = plot_optimization_history(studies, target=lambda t: t.number) assert len(figure.data) == 1 * n_studies assert np.array_equal(figure.data[0].x, np.array([0, 1, 2], dtype=float)) assert np.array_equal(figure.data[0].y, np.array([0, 1, 2], dtype=float)) # Test customized target name. figure = plot_optimization_history(studies, target_name="Target Name") assert figure.data[0].name == f"Target Name of {studies[0].study_name}" assert figure.layout.yaxis.title.text == "Target Name" # Ignore failed trials. def fail_objective(_: Trial) -> float: raise ValueError studies = [create_study(direction=direction) for _ in range(n_studies)] for study in studies: study.optimize(fail_objective, n_trials=1, catch=(ValueError,)) figure = plot_optimization_history(studies) assert len(figure.data) == 0
def test_plot_optimization_history(direction: str) -> None: # Test with no trial. study = create_study(direction=direction) figure = plot_optimization_history(study) assert len(figure.data) == 0 def objective(trial: Trial) -> float: if trial.number == 0: return 1.0 elif trial.number == 1: return 2.0 elif trial.number == 2: return 0.0 return 0.0 # Test with a trial. study = create_study(direction=direction) study.optimize(objective, n_trials=3) figure = plot_optimization_history(study) assert len(figure.data) == 2 assert np.array_equal(figure.data[0].x, [0, 1, 2]) assert np.array_equal(figure.data[0].y, [1.0, 2.0, 0.0]) assert np.array_equal(figure.data[1].x, [0, 1, 2]) ydata = figure.data[1].y if direction == "minimize": assert np.array_equal(ydata, [1.0, 1.0, 0.0]) else: assert np.array_equal(ydata, [1.0, 2.0, 2.0]) legend_texts = [x.name for x in figure.data] assert legend_texts == ["Objective Value", "Best Value"] assert figure.layout.yaxis.title.text == "Objective Value" # Test customized target. with pytest.warns(UserWarning): figure = plot_optimization_history(study, target=lambda t: t.number) assert len(figure.data) == 1 assert np.array_equal(figure.data[0].x, [0, 1, 2]) assert np.array_equal(figure.data[0].y, [0.0, 1.0, 2.0]) # Test customized target name. custom_target_name = "Target Name" figure = plot_optimization_history(study, target_name=custom_target_name) legend_texts = [x.name for x in figure.data] assert legend_texts == [custom_target_name, "Best Value"] assert figure.layout.yaxis.title.text == custom_target_name # Ignore failed trials. def fail_objective(_: Trial) -> float: raise ValueError study = create_study(direction=direction) study.optimize(fail_objective, n_trials=1, catch=(ValueError, )) figure = plot_optimization_history(study) assert len(figure.data) == 0
def test_plot_optimization_history(direction: str) -> None: # Test with no trial. study = create_study(direction=direction) figure = plot_optimization_history(study) assert len(figure.data) == 0 def objective(trial: Trial) -> float: if trial.number == 0: return 1.0 elif trial.number == 1: return 2.0 elif trial.number == 2: return 0.0 return 0.0 # Test with a trial. study = create_study(direction=direction) study.optimize(objective, n_trials=3) figure = plot_optimization_history(study) assert len(figure.data) == 2 assert figure.data[0].x == (0, 1, 2) assert figure.data[0].y == (1.0, 2.0, 0.0) assert figure.data[1].x == (0, 1, 2) if direction == "minimize": assert figure.data[1].y == (1.0, 1.0, 0.0) else: assert figure.data[1].y == (1.0, 2.0, 2.0) assert figure.data[0].name == "Objective Value" assert figure.layout.yaxis.title.text == "Objective Value" # Test customized target. with pytest.warns(UserWarning): figure = plot_optimization_history(study, target=lambda t: t.number) assert len(figure.data) == 1 assert figure.data[0].x == (0, 1, 2) assert figure.data[0].y == (0, 1, 2) # Test customized target name. figure = plot_optimization_history(study, target_name="Target Name") assert figure.data[0].name == "Target Name" assert figure.layout.yaxis.title.text == "Target Name" # Ignore failed trials. def fail_objective(_: Trial) -> float: raise ValueError study = create_study(direction=direction) study.optimize(fail_objective, n_trials=1, catch=(ValueError, )) figure = plot_optimization_history(study) assert len(figure.data) == 0
def __call__(self, study, trial): import optuna.visualization as vis self.exp.log_metric('run_score', trial.value) self.exp.log_metric('best_so_far_run_score', study.best_value) self.exp.log_text('run_parameters', str(trial.params)) if self.log_study: pickle_and_log_artifact(study, 'study.pkl', experiment=self.exp) if self.log_optimization_history: log_chart(name='optimization_history', chart=vis.plot_optimization_history(study), experiment=self.exp) if self.log_contour: log_chart(name='contour', chart=vis.plot_contour(study, params=self.params), experiment=self.exp) if self.log_parallel_coordinate: log_chart(name='parallel_coordinate', chart=vis.plot_parallel_coordinate(study, params=self.params), experiment=self.exp) if self.log_slice: log_chart(name='slice', chart=vis.plot_slice(study, params=self.params), experiment=self.exp)
def log_study_info(study, experiment=None, log_charts=True, params=None): """Logs runs results and parameters to neptune. Logs all hyperparameter optimization results to Neptune. Those include best score ('best_score' metric), best parameters ('best_parameters' property), the study object itself as artifact, and interactive optuna charts ('contour', 'parallel_coordinate', 'slice', 'optimization_history') as artifacts in 'charts' sub folder. Args: study('optuna.study.Study'): Optuna study object after training is completed. experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. log_charts('bool'): Whether optuna visualization charts should be logged. By default all charts are logged. params(`list`): List of parameters to be visualized. Default is all parameters. Examples: Initialize neptune_monitor:: import neptune import neptunecontrib.monitoring.optuna as opt_utils neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME') neptune.create_experiment(name='optuna sweep') neptune_callback = opt_utils.NeptuneCallback() Run Optuna training passing monitor as callback:: ... study = optuna.create_study(direction='maximize') study.optimize(objective, n_trials=100, callbacks=[neptune_callback]) opt_utils.log_study_info(study) You can explore an example experiment in Neptune: https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-1016/artifacts """ import optuna.visualization as vis _exp = experiment if experiment else neptune _exp.log_metric('best_score', study.best_value) _exp.set_property('best_parameters', study.best_params) if log_charts: log_chart(name='optimization_history', chart=vis.plot_optimization_history(study), experiment=_exp) log_chart(name='contour', chart=vis.plot_contour(study, params=params), experiment=_exp) log_chart(name='parallel_coordinate', chart=vis.plot_parallel_coordinate(study, params=params), experiment=_exp) log_chart(name='slice', chart=vis.plot_slice(study, params=params), experiment=_exp) pickle_and_log_artifact(study, 'study.pkl', experiment=_exp)
def test_plot_optimization_history(direction): # type: (str) -> None # Test with no trial. study = create_study(direction=direction) figure = plot_optimization_history(study) assert len(figure.data) == 0 def objective(trial): # type: (Trial) -> float if trial.number == 0: return 1.0 elif trial.number == 1: return 2.0 elif trial.number == 2: return 0.0 return 0.0 # Test with a trial. study = create_study(direction=direction) study.optimize(objective, n_trials=3) figure = plot_optimization_history(study) assert len(figure.data) == 2 assert figure.data[0].x == (0, 1, 2) assert figure.data[0].y == (1.0, 2.0, 0.0) assert figure.data[1].x == (0, 1, 2) if direction == "minimize": assert figure.data[1].y == (1.0, 1.0, 0.0) else: assert figure.data[1].y == (1.0, 2.0, 2.0) # Ignore failed trials. def fail_objective(_): # type: (Trial) -> float raise ValueError study = create_study(direction=direction) study.optimize(fail_objective, n_trials=1, catch=(ValueError,)) figure = plot_optimization_history(study) assert len(figure.data) == 0
def test_error_bar_in_optimization_history(direction: str) -> None: def objective(trial: Trial) -> float: return trial.suggest_float("x", 0, 1) studies = [create_study(direction=direction) for _ in range(3)] suggested_params = [0.1, 0.3, 0.2] for x, study in zip(suggested_params, studies): study.enqueue_trial({"x": x}) study.optimize(objective, n_trials=1) figure = plot_optimization_history(studies, error_bar=True) mean = np.mean(suggested_params) std = np.std(suggested_params) np.testing.assert_almost_equal(figure.data[0].y, mean) np.testing.assert_almost_equal(figure.data[2].y, mean + std) np.testing.assert_almost_equal(figure.data[3].y, mean - std)
def draw_results(study): # 优化历史 plt.figure() fig = pv.plot_optimization_history(study) fig.write_image("./output/opt_his.png") plt.close() # 等高线图 plt.figure() fig = pv.plot_contour(study) fig.write_image("./output/opt_contour.png") plt.close() # 经验分布图 plt.figure() fig = pv.plot_edf(study) fig.write_image("./output/opt_edf.png") plt.close() # 高维参数 plt.figure() fig = pv.plot_parallel_coordinate(study) fig.write_image("./output/opt_coordinate.png") plt.close()
def test_target_is_none_and_study_is_multi_obj() -> None: study = create_study(directions=["minimize", "minimize"]) with pytest.raises(ValueError): plot_optimization_history(study)
best_model_predict = best_model.predict(x_test) print('\nBest Model performance at competition:') print( 'RMSE: {:.4f} (should be lower than the trivial predictor using the mean MSE: {:.4f})' .format( math.sqrt(metrics.mean_squared_error(y_test, best_model_predict)), math.sqrt( metrics.mean_squared_error( y_test, [y_test.mean() for i in range(len(y_test))])))) print( 'R square: {:.4f} (should be higher than the trivial predictor using the mean: R square {:.4f})' .format( metrics.r2_score(y_test, best_model_predict), metrics.r2_score(y_test, [y_test.mean() for i in range(len(y_test))]))) #3.8 Final model train best_model.fit(x, y) y_comp = [math.exp(i) for i in best_model.predict(x_comp)] submission = pd.DataFrame(columns=['Id', 'SalePrice']) submission['Id'] = pd.Series(range(1461, 2920)) submission['SalePrice'] = pd.Series(y_comp) submission.to_csv('submission.csv', index=False) #3.9 Optuna visualization ov.plot_optimization_history(knn_optuna).show() #ov.plot_parallel_coordinate(knn_optuna).show() ov.plot_contour(knn_optuna).show() ov.plot_slice(knn_optuna).show() ov.plot_param_importances(knn_optuna).show() #ov.plot_edf(knn_optuna).show()
def log_study_info(study, experiment=None, log_study=True, log_charts=True, log_optimization_history=False, log_contour=False, log_parallel_coordinate=False, log_slice=False, params=None): """Logs runs results and parameters to neptune. Logs all hyperparameter optimization results to Neptune. Those include best score ('best_score' metric), best parameters ('best_parameters' property), the study object itself as artifact, and interactive optuna charts ('contour', 'parallel_coordinate', 'slice', 'optimization_history') as artifacts in 'charts' sub folder. Args: study('optuna.study.Study'): Optuna study object after training is completed. experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. log_study('bool'): Whether optuna study object should be logged as pickle. Default is True. log_charts('bool'): Deprecated argument. Whether all optuna visualizations charts should be logged. By default all charts are sent. To not log any charts set log_charts=False. If you want to log a particular chart change the argument for that chart explicitly. For example log_charts=False and log_slice=True will log only the slice plot to Neptune. log_optimization_history('bool'): Whether optuna optimization history chart should be logged. Default is True. log_contour('bool'): Whether optuna contour plot should be logged. Default is True. log_parallel_coordinate('bool'): Whether optuna parallel coordinate plot should be logged. Default is True. log_slice('bool'): Whether optuna slice chart should be logged. Default is True. params(`list`): List of parameters to be visualized. Default is all parameters. Examples: Initialize neptune_monitor:: import neptune import neptunecontrib.monitoring.optuna as opt_utils neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME') neptune.create_experiment(name='optuna sweep') neptune_callback = opt_utils.NeptuneCallback() Run Optuna training passing monitor as callback:: ... study = optuna.create_study(direction='maximize') study.optimize(objective, n_trials=100, callbacks=[neptune_callback]) opt_utils.log_study_info(study) You can explore an example experiment in Neptune: https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-1016/artifacts """ import optuna.visualization as vis _exp = experiment if experiment else neptune _exp.log_metric('best_score', study.best_value) _exp.set_property('best_parameters', study.best_params) if log_charts: message = """log_charts argument is depraceted and will be removed in future releases. Please use log_optimization_history, log_contour, log_parallel_coordinate, log_slice, arguments explicitly. """ warnings.warn(message) log_optimization_history = True log_contour = True log_parallel_coordinate = True log_slice = True if log_study: pickle_and_log_artifact(study, 'study.pkl', experiment=_exp) if log_optimization_history: log_chart(name='optimization_history', chart=vis.plot_optimization_history(study), experiment=_exp) if log_contour: log_chart(name='contour', chart=vis.plot_contour(study, params=params), experiment=_exp) if log_parallel_coordinate: log_chart(name='parallel_coordinate', chart=vis.plot_parallel_coordinate(study, params=params), experiment=_exp) if log_slice: log_chart(name='slice', chart=vis.plot_slice(study, params=params), experiment=_exp)
def hyperparameters_optimization(self) -> None: if self.verbose > 0: print("Optimizing hyperparameters") if self.storage is not None and self.study_name is None: warnings.warn( f"You passed a remote storage: {self.storage} but no `--study-name`." "The study name will be generated by Optuna, make sure to re-use the same study name " "when you want to do distributed hyperparameter optimization.") if self.tensorboard_log is not None: warnings.warn( "Tensorboard log is deactivated when running hyperparameter optimization" ) self.tensorboard_log = None # TODO: eval each hyperparams several times to account for noisy evaluation sampler = self._create_sampler(self.sampler) pruner = self._create_pruner(self.pruner) if self.verbose > 0: print(f"Sampler: {self.sampler} - Pruner: {self.pruner}") study = optuna.create_study( sampler=sampler, pruner=pruner, storage=self.storage, study_name=self.study_name, load_if_exists=True, direction="maximize", ) try: study.optimize(self.objective, n_trials=self.n_trials, n_jobs=self.n_jobs) except KeyboardInterrupt: pass print("Number of finished trials: ", len(study.trials)) print("Best trial:") trial = study.best_trial print("Value: ", trial.value) print("Params: ") for key, value in trial.params.items(): print(f" {key}: {value}") report_name = ( f"report_{self.env_id}_{self.n_trials}-trials-{self.n_timesteps}" f"-{self.sampler}-{self.pruner}_{int(time.time())}") log_path = os.path.join(self.log_folder, self.algo, report_name) if self.verbose: print(f"Writing report to {log_path}") # Write report os.makedirs(os.path.dirname(log_path), exist_ok=True) study.trials_dataframe().to_csv(f"{log_path}.csv") # Save python object to inspect/re-use it later with open(f"{log_path}.pkl", "wb+") as f: pkl.dump(study, f) # Skip plots if self.no_optim_plots: return # Plot optimization result try: fig1 = plot_optimization_history(study) fig2 = plot_param_importances(study) fig1.show() fig2.show() except (ValueError, ImportError, RuntimeError): pass
def show_history(self): return visualization.plot_optimization_history(self.study)
def ml_mlp_mul_ms(station_name="종로구"): print("Start Multivariate MLP Mean Seasonality Decomposition (MSE) Model") targets = ["PM10", "PM25"] # targets = ["SO2", "CO", "O3", "NO2", "PM10", "PM25", # "temp", "u", "v", "pres", "humid", "prep", "snow"] # 24*14 = 336 #sample_size = 336 sample_size = 48 output_size = 24 # If you want to debug, fast_dev_run = True and n_trials should be small number fast_dev_run = False n_trials = 128 # fast_dev_run = True # n_trials = 1 # Hyper parameter epoch_size = 500 batch_size = 64 learning_rate = 1e-3 # Blocked Cross Validation # neglect small overlap between train_dates and valid_dates # 11y = ((2y, 0.5y), (2y, 0.5y), (2y, 0.5y), (2.5y, 1y)) train_dates = [(dt.datetime(2008, 1, 4, 1).astimezone(SEOULTZ), dt.datetime(2009, 12, 31, 23).astimezone(SEOULTZ)), (dt.datetime(2010, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2012, 6, 30, 23).astimezone(SEOULTZ)), (dt.datetime(2013, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2014, 12, 31, 23).astimezone(SEOULTZ)), (dt.datetime(2015, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2017, 12, 31, 23).astimezone(SEOULTZ))] valid_dates = [(dt.datetime(2010, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2010, 6, 30, 23).astimezone(SEOULTZ)), (dt.datetime(2012, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2012, 12, 31, 23).astimezone(SEOULTZ)), (dt.datetime(2015, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2015, 6, 30, 23).astimezone(SEOULTZ)), (dt.datetime(2018, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ))] train_valid_fdate = dt.datetime(2008, 1, 3, 1).astimezone(SEOULTZ) train_valid_tdate = dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ) # Debug if fast_dev_run: train_dates = [(dt.datetime(2015, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2017, 12, 31, 23).astimezone(SEOULTZ))] valid_dates = [(dt.datetime(2018, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ))] train_valid_fdate = dt.datetime(2015, 7, 1, 0).astimezone(SEOULTZ) train_valid_tdate = dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ) test_fdate = dt.datetime(2019, 1, 1, 0).astimezone(SEOULTZ) test_tdate = dt.datetime(2020, 10, 31, 23).astimezone(SEOULTZ) # check date range assumption assert len(train_dates) == len(valid_dates) for i, (td, vd) in enumerate(zip(train_dates, valid_dates)): assert vd[0] > td[1] assert test_fdate > train_dates[-1][1] assert test_fdate > valid_dates[-1][1] train_features = [ "SO2", "CO", "NO2", "PM10", "PM25", "temp", "wind_spd", "wind_cdir", "wind_sdir", "pres", "humid", "prep" ] train_features_periodic = [ "SO2", "CO", "NO2", "PM10", "PM25", "temp", "wind_spd", "wind_cdir", "wind_sdir", "pres", "humid" ] train_features_nonperiodic = ["prep"] for target in targets: print("Training " + target + "...") output_dir = Path( f"/mnt/data/MLPMSMultivariate/{station_name}/{target}/") Path.mkdir(output_dir, parents=True, exist_ok=True) model_dir = output_dir / "models" Path.mkdir(model_dir, parents=True, exist_ok=True) log_dir = output_dir / "log" Path.mkdir(log_dir, parents=True, exist_ok=True) _df_h = data.load_imputed(HOURLY_DATA_PATH) df_h = _df_h.query('stationCode == "' + str(SEOUL_STATIONS[station_name]) + '"') if station_name == '종로구' and \ not Path("/input/python/input_jongno_imputed_hourly_pandas.csv").is_file(): # load imputed result df_h.to_csv("/input/python/input_jongno_imputed_hourly_pandas.csv") # construct dataset for seasonality print("Construct Train/Validation Sets...", flush=True) train_valid_dataset = construct_dataset(train_valid_fdate, train_valid_tdate, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, transform=False) # compute seasonality train_valid_dataset.preprocess() # For Block Cross Validation.. # load dataset in given range dates and transform using scaler from train_valid_set # all dataset are saved in tuple print("Construct Training Sets...", flush=True) train_datasets = tuple( construct_dataset(td[0], td[1], scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, transform=True) for td in train_dates) print("Construct Validation Sets...", flush=True) valid_datasets = tuple( construct_dataset(vd[0], vd[1], scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, transform=True) for vd in valid_dates) # just single test set print("Construct Test Sets...", flush=True) test_dataset = construct_dataset( test_fdate, test_tdate, scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, transform=True) # convert tuple of datasets to ConcatDataset train_dataset = ConcatDataset(train_datasets) val_dataset = ConcatDataset(valid_datasets) # num_layer == number of hidden layer hparams = Namespace(num_layers=1, layer_size=128, learning_rate=learning_rate, batch_size=batch_size) def objective(trial): model = BaseMLPModel( trial=trial, hparams=hparams, input_size=sample_size * len(train_features), sample_size=sample_size, output_size=output_size, station_name=station_name, target=target, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, train_dataset=train_dataset, val_dataset=val_dataset, test_dataset=test_dataset, scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, output_dir=output_dir) # most basic trainer, uses good defaults trainer = Trainer(gpus=1 if torch.cuda.is_available() else None, precision=32, min_epochs=1, max_epochs=20, default_root_dir=output_dir, fast_dev_run=fast_dev_run, logger=True, checkpoint_callback=False, callbacks=[ PyTorchLightningPruningCallback( trial, monitor="valid/MSE") ]) trainer.fit(model) # Don't Log # hyperparameters = model.hparams # trainer.logger.log_hyperparams(hyperparameters) return trainer.callback_metrics.get("valid/MSE") if n_trials > 1: study = optuna.create_study(direction="minimize") study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 8, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 64, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 8, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 12, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 0.7, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 2.0, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) # timeout = 3600*36 = 36h study.optimize(objective, n_trials=n_trials, timeout=3600 * 36) trial = study.best_trial print(" Value: ", trial.value) print(" Params: ") for key, value in trial.params.items(): print(" {}: {}".format(key, value)) print("sample_size : ", sample_size) print("output_size : ", output_size) # plot optmization results fig_cont1 = optv.plot_contour(study, params=['num_layers', 'layer_size']) fig_cont1.write_image( str(output_dir / "contour_num_layers_layer_size.png")) fig_cont1.write_image( str(output_dir / "contour_num_layers_layer_size.svg")) fig_edf = optv.plot_edf(study) fig_edf.write_image(str(output_dir / "edf.png")) fig_edf.write_image(str(output_dir / "edf.svg")) fig_iv = optv.plot_intermediate_values(study) fig_iv.write_image(str(output_dir / "intermediate_values.png")) fig_iv.write_image(str(output_dir / "intermediate_values.svg")) fig_his = optv.plot_optimization_history(study) fig_his.write_image(str(output_dir / "opt_history.png")) fig_his.write_image(str(output_dir / "opt_history.svg")) fig_pcoord = optv.plot_parallel_coordinate( study, params=['num_layers', 'layer_size']) fig_pcoord.write_image(str(output_dir / "parallel_coord.png")) fig_pcoord.write_image(str(output_dir / "parallel_coord.svg")) fig_slice = optv.plot_slice(study, params=['num_layers', 'layer_size']) fig_slice.write_image(str(output_dir / "slice.png")) fig_slice.write_image(str(output_dir / "slice.svg")) # set hparams with optmized value hparams.num_layers = trial.params['num_layers'] hparams.layer_size = trial.params['layer_size'] dict_hparams = copy.copy(vars(hparams)) dict_hparams["sample_size"] = sample_size dict_hparams["output_size"] = output_size with open(output_dir / 'hparams.json', 'w') as f: print(dict_hparams, file=f) with open(output_dir / 'hparams.csv', 'w') as f: print(pd.DataFrame.from_dict(dict_hparams, orient='index'), file=f) model = BaseMLPModel(hparams=hparams, input_size=sample_size * len(train_features), sample_size=sample_size, output_size=output_size, station_name=station_name, target=target, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, train_dataset=train_dataset, val_dataset=val_dataset, test_dataset=test_dataset, scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, output_dir=output_dir) # record input for i, _train_set in enumerate(train_datasets): _train_set.to_csv( model.data_dir / ("df_trainset_{0}_".format(str(i).zfill(2)) + target + ".csv")) for i, _valid_set in enumerate(valid_datasets): _valid_set.to_csv( model.data_dir / ("df_validset_{0}_".format(str(i).zfill(2)) + target + ".csv")) train_valid_dataset.to_csv(model.data_dir / ("df_trainvalidset_" + target + ".csv")) test_dataset.to_csv(model.data_dir / ("df_testset_" + target + ".csv")) checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( model_dir, "train_{epoch}_{valid/MSE:.2f}"), monitor="valid/MSE", period=10) early_stop_callback = EarlyStopping(monitor='valid/MSE', min_delta=0.001, patience=30, verbose=True, mode='min') log_version = dt.date.today().strftime("%y%m%d-%H-%M") loggers = [ \ TensorBoardLogger(log_dir, version=log_version), CSVLogger(log_dir, version=log_version)] # most basic trainer, uses good defaults trainer = Trainer(gpus=1 if torch.cuda.is_available() else None, precision=32, min_epochs=1, max_epochs=epoch_size, default_root_dir=output_dir, fast_dev_run=fast_dev_run, logger=loggers, log_every_n_steps=5, flush_logs_every_n_steps=10, callbacks=[early_stop_callback], checkpoint_callback=checkpoint_callback) trainer.fit(model) # run test set trainer.test(ckpt_path=None) shutil.rmtree(model_dir)
# Handle pruning based on the intermediate value. if trial.should_prune(): raise optuna.TrialPruned() return value if __name__ == "__main__": study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner()) study.optimize(objective, n_trials=100, timeout=600) # Visualize the optimization history. plot_optimization_history(study).show() # Visualize the learning curves of the trials. plot_intermediate_values(study).show() # Visualize high-dimensional parameter relationships. plot_parallel_coordinate(study).show() # Select parameters to visualize. plot_parallel_coordinate(study, params=["lr_init", "n_units_l0"]).show() # Visualize hyperparameter relationships. plot_contour(study).show() # Select parameters to visualize. plot_contour(study, params=["n_units_l0", "n_units_l1"]).show()
return accuracy ################################################################################################### study = optuna.create_study( direction="maximize", sampler=optuna.samplers.TPESampler(seed=SEED), pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), ) study.optimize(objective, n_trials=100, timeout=600) ################################################################################################### # Plot functions # -------------- # Visualize the optimization history. See :func:`~optuna.visualization.plot_optimization_history` for the details. plot_optimization_history(study) ################################################################################################### # Visualize the learning curves of the trials. See :func:`~optuna.visualization.plot_intermediate_values` for the details. plot_intermediate_values(study) ################################################################################################### # Visualize high-dimensional parameter relationships. See :func:`~optuna.visualization.plot_parallel_coordinate` for the details. plot_parallel_coordinate(study) ################################################################################################### # Select parameters to visualize. plot_parallel_coordinate(study, params=["bagging_freq", "bagging_fraction"]) ################################################################################################### # Visualize hyperparameter relationships. See :func:`~optuna.visualization.plot_contour` for the details.
def test_plot_optimization_history_with_multiple_studies( direction: str) -> None: n_studies = 10 # Test with no trial. studies = [create_study(direction=direction) for _ in range(n_studies)] figure = plot_optimization_history(studies) assert len(figure.data) == 0 def objective(trial: Trial) -> float: if trial.number == 0: return 1.0 elif trial.number == 1: return 2.0 elif trial.number == 2: return 0.0 return 0.0 # Test with trials. studies = [create_study(direction=direction) for _ in range(n_studies)] for study in studies: study.optimize(objective, n_trials=3) figure = plot_optimization_history(studies) assert len(figure.data) == 2 * n_studies assert np.array_equal(figure.data[0].x, [0, 1, 2]) assert np.array_equal(figure.data[0].y, [1.0, 2.0, 0.0]) assert np.array_equal(figure.data[1].x, [0, 1, 2]) ydata = figure.data[1].y if direction == "minimize": assert np.array_equal(ydata, [1.0, 1.0, 0.0]) else: assert np.array_equal(ydata, [1.0, 2.0, 2.0]) expected_legend_texts = [] for i in range(n_studies): expected_legend_texts.append(f"Best Value of {studies[i].study_name}") expected_legend_texts.append( f"Objective Value of {studies[i].study_name}") legend_texts = [scatter.name for scatter in figure.data] assert sorted(legend_texts) == sorted(expected_legend_texts) assert figure.layout.yaxis.title.text == "Objective Value" # Test customized target. with pytest.warns(UserWarning): figure = plot_optimization_history(studies, target=lambda t: t.number) assert len(figure.data) == 1 * n_studies assert np.array_equal(figure.data[0].x, [0, 1, 2]) assert np.array_equal(figure.data[0].y, [0, 1, 2]) # Test customized target name. custom_target_name = "Target Name" figure = plot_optimization_history(studies, target_name=custom_target_name) expected_legend_texts = [] for i in range(n_studies): expected_legend_texts.append(f"Best Value of {studies[i].study_name}") expected_legend_texts.append( f"{custom_target_name} of {studies[i].study_name}") legend_texts = [scatter.name for scatter in figure.data] assert sorted(legend_texts) == sorted(expected_legend_texts) assert figure.layout.yaxis.title.text == custom_target_name # Ignore failed trials. def fail_objective(_: Trial) -> float: raise ValueError studies = [create_study(direction=direction) for _ in range(n_studies)] for study in studies: study.optimize(fail_objective, n_trials=1, catch=(ValueError, )) figure = plot_optimization_history(studies) assert len(figure.data) == 0