def test_plot_slice_log_scale() -> None: study = create_study() study.add_trial( create_trial( value=0.0, params={ "x_linear": 1.0, "y_log": 1e-3 }, distributions={ "x_linear": UniformDistribution(0.0, 3.0), "y_log": LogUniformDistribution(1e-5, 1.0), }, )) # Plot a parameter. figure = plot_slice(study, params=["y_log"]) assert figure.layout["xaxis_type"] == "log" figure = plot_slice(study, params=["x_linear"]) assert figure.layout["xaxis_type"] is None # Plot multiple parameters. figure = plot_slice(study) assert figure.layout["xaxis_type"] is None assert figure.layout["xaxis2_type"] == "log"
def make_plots(logdir, study): logdir = f'{logdir}/plots' os.makedirs(logdir, exist_ok=True) plot_optimization_history(study).write_image(f'{logdir}/history.svg') plot_intermediate_values(study).write_image(f'{logdir}/intermediates.svg') plot_parallel_coordinate(study).write_image(f'{logdir}/parallel_coordinates.png') plot_slice(study).write_image(f'{logdir}/slices.svg') plot_param_importances(study).write_image(f'{logdir}/importances.svg')
def test_color_map(direction: str) -> None: study = prepare_study_with_trials(with_c_d=False, direction=direction) # Since `plot_slice`'s colormap depends on only trial.number, `reversecale` is not in the plot. marker = plot_slice(study).data[0]["marker"] assert COLOR_SCALE == [v[1] for v in marker["colorscale"]] assert "reversecale" not in marker
def __call__(self, study, trial): import optuna.visualization as vis self.exp.log_metric('run_score', trial.value) self.exp.log_metric('best_so_far_run_score', study.best_value) self.exp.log_text('run_parameters', str(trial.params)) if self.log_study: pickle_and_log_artifact(study, 'study.pkl', experiment=self.exp) if self.log_optimization_history: log_chart(name='optimization_history', chart=vis.plot_optimization_history(study), experiment=self.exp) if self.log_contour: log_chart(name='contour', chart=vis.plot_contour(study, params=self.params), experiment=self.exp) if self.log_parallel_coordinate: log_chart(name='parallel_coordinate', chart=vis.plot_parallel_coordinate(study, params=self.params), experiment=self.exp) if self.log_slice: log_chart(name='slice', chart=vis.plot_slice(study, params=self.params), experiment=self.exp)
def log_study_info(study, experiment=None, log_charts=True, params=None): """Logs runs results and parameters to neptune. Logs all hyperparameter optimization results to Neptune. Those include best score ('best_score' metric), best parameters ('best_parameters' property), the study object itself as artifact, and interactive optuna charts ('contour', 'parallel_coordinate', 'slice', 'optimization_history') as artifacts in 'charts' sub folder. Args: study('optuna.study.Study'): Optuna study object after training is completed. experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. log_charts('bool'): Whether optuna visualization charts should be logged. By default all charts are logged. params(`list`): List of parameters to be visualized. Default is all parameters. Examples: Initialize neptune_monitor:: import neptune import neptunecontrib.monitoring.optuna as opt_utils neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME') neptune.create_experiment(name='optuna sweep') neptune_callback = opt_utils.NeptuneCallback() Run Optuna training passing monitor as callback:: ... study = optuna.create_study(direction='maximize') study.optimize(objective, n_trials=100, callbacks=[neptune_callback]) opt_utils.log_study_info(study) You can explore an example experiment in Neptune: https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-1016/artifacts """ import optuna.visualization as vis _exp = experiment if experiment else neptune _exp.log_metric('best_score', study.best_value) _exp.set_property('best_parameters', study.best_params) if log_charts: log_chart(name='optimization_history', chart=vis.plot_optimization_history(study), experiment=_exp) log_chart(name='contour', chart=vis.plot_contour(study, params=params), experiment=_exp) log_chart(name='parallel_coordinate', chart=vis.plot_parallel_coordinate(study, params=params), experiment=_exp) log_chart(name='slice', chart=vis.plot_slice(study, params=params), experiment=_exp) pickle_and_log_artifact(study, 'study.pkl', experiment=_exp)
def test_plot_slice() -> None: # Test with no trial. study = prepare_study_with_trials(no_trials=True) figure = plot_slice(study) assert len(figure.data) == 0 study = prepare_study_with_trials(with_c_d=False) # Test with a trial. figure = plot_slice(study) assert len(figure.data) == 2 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (0.0, 1.0) assert figure.data[1]["x"] == (2.0, 0.0, 1.0) assert figure.data[1]["y"] == (0.0, 2.0, 1.0) assert figure.layout.yaxis.title.text == "Objective Value" # Test with a trial to select parameter. figure = plot_slice(study, params=["param_a"]) assert len(figure.data) == 1 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (0.0, 1.0) # Test with a customized target value. with pytest.warns(UserWarning): figure = plot_slice(study, params=["param_a"], target=lambda t: t.params["param_b"]) assert len(figure.data) == 1 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (2.0, 1.0) assert figure.layout.yaxis.title.text == "Objective Value" # Test with a customized target name. figure = plot_slice(study, target_name="Target Name") assert figure.layout.yaxis.title.text == "Target Name" # Test with wrong parameters. with pytest.raises(ValueError): plot_slice(study, params=["optuna"]) # Ignore failed trials. def fail_objective(_: Trial) -> float: raise ValueError study = create_study() study.optimize(fail_objective, n_trials=1, catch=(ValueError, )) figure = plot_slice(study) assert len(figure.data) == 0
def test_color_map(direction: str) -> None: study = create_study(direction=direction) for i in range(3): study.add_trial( create_trial( value=float(i), params={"param_a": float(i), "param_b": float(i)}, distributions={ "param_a": FloatDistribution(0.0, 3.0), "param_b": FloatDistribution(0.0, 3.0), }, ) ) # Since `plot_slice`'s colormap depends on only trial.number, `reversecale` is not in the plot. marker = plot_slice(study).data[0]["marker"] assert COLOR_SCALE == [v[1] for v in marker["colorscale"]] assert "reversecale" not in marker
def test_plot_slice(): # type: () -> None # Test with no trial. study = prepare_study_with_trials(no_trials=True) figure = plot_slice(study) assert len(figure.data) == 0 study = prepare_study_with_trials(with_c_d=False) # Test with a trial. figure = plot_slice(study) assert len(figure.data) == 2 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (0.0, 1.0) assert figure.data[1]["x"] == (2.0, 0.0, 1.0) assert figure.data[1]["y"] == (0.0, 2.0, 1.0) # Test with a trial to select parameter. figure = plot_slice(study, params=["param_a"]) assert len(figure.data) == 1 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (0.0, 1.0) # Test with wrong parameters. with pytest.raises(ValueError): plot_slice(study, params=["optuna"]) # Ignore failed trials. def fail_objective(_): # type: (Trial) -> float raise ValueError study = create_study() study.optimize(fail_objective, n_trials=1, catch=(ValueError, )) figure = plot_slice(study) assert len(figure.data) == 0
# Visualize high-dimensional parameter relationships. See :func:`~optuna.visualization.plot_parallel_coordinate` for the details. plot_parallel_coordinate(study) ################################################################################################### # Select parameters to visualize. plot_parallel_coordinate(study, params=["bagging_freq", "bagging_fraction"]) ################################################################################################### # Visualize hyperparameter relationships. See :func:`~optuna.visualization.plot_contour` for the details. plot_contour(study) ################################################################################################### # Select parameters to visualize. plot_contour(study, params=["bagging_freq", "bagging_fraction"]) ################################################################################################### # Visualize individual hyperparameters as slice plot. See :func:`~optuna.visualization.plot_slice` for the details. plot_slice(study) ################################################################################################### # Select parameters to visualize. plot_slice(study, params=["bagging_freq", "bagging_fraction"]) ################################################################################################### # Visualize parameter importances. See :func:`~optuna.visualization.plot_param_importances` for the details. plot_param_importances(study) ################################################################################################### # Visualize empirical distribution function. See :func:`~optuna.visualization.plot_edf` for the details. plot_edf(study)
if __name__ == "__main__": study = optuna.create_study(direction="maximize", pruner=optuna.pruners.MedianPruner()) study.optimize(objective, n_trials=100, timeout=600) # Visualize the optimization history. plot_optimization_history(study).show() # Visualize the learning curves of the trials. plot_intermediate_values(study).show() # Visualize high-dimensional parameter relationships. plot_parallel_coordinate(study).show() # Select parameters to visualize. plot_parallel_coordinate(study, params=["lr_init", "n_units_l0"]).show() # Visualize hyperparameter relationships. plot_contour(study).show() # Select parameters to visualize. plot_contour(study, params=["n_units_l0", "n_units_l1"]).show() # Visualize individual hyperparameters. plot_slice(study).show() # Select parameters to visualize. plot_slice(study, params=["n_units_l0", "n_units_l1"]).show()
def test_plot_slice() -> None: # Test with no trial. study = create_study(direction="minimize") figure = plot_slice(study) assert len(figure.data) == 0 study = create_study(direction="minimize") study.add_trial( create_trial( value=0.0, params={"param_a": 1.0, "param_b": 2.0}, distributions={ "param_a": FloatDistribution(0.0, 3.0), "param_b": FloatDistribution(0.0, 3.0), }, ) ) study.add_trial( create_trial( value=2.0, params={"param_b": 0.0}, distributions={"param_b": FloatDistribution(0.0, 3.0)}, ) ) study.add_trial( create_trial( value=1.0, params={"param_a": 2.5, "param_b": 1.0}, distributions={ "param_a": FloatDistribution(0.0, 3.0), "param_b": FloatDistribution(0.0, 3.0), }, ) ) # Test with a trial. figure = plot_slice(study) assert len(figure.data) == 2 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (0.0, 1.0) assert figure.data[1]["x"] == (2.0, 0.0, 1.0) assert figure.data[1]["y"] == (0.0, 2.0, 1.0) assert figure.layout.yaxis.title.text == "Objective Value" # Test with a trial to select parameter. figure = plot_slice(study, params=["param_a"]) assert len(figure.data) == 1 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (0.0, 1.0) # Test with a customized target value. with pytest.warns(UserWarning): figure = plot_slice(study, params=["param_a"], target=lambda t: t.params["param_b"]) assert len(figure.data) == 1 assert figure.data[0]["x"] == (1.0, 2.5) assert figure.data[0]["y"] == (2.0, 1.0) assert figure.layout.yaxis.title.text == "Objective Value" # Test with a customized target name. figure = plot_slice(study, target_name="Target Name") assert figure.layout.yaxis.title.text == "Target Name" # Test with wrong parameters. with pytest.raises(ValueError): plot_slice(study, params=["optuna"]) # Ignore failed trials. study = create_study() study.optimize(fail_objective, n_trials=1, catch=(ValueError,)) figure = plot_slice(study) assert len(figure.data) == 0
def test_target_is_none_and_study_is_multi_obj() -> None: study = create_study(directions=["minimize", "minimize"]) with pytest.raises(ValueError): plot_slice(study)
def ml_mlp_mul_ms(station_name="종로구"): print("Start Multivariate MLP Mean Seasonality Decomposition (MSE) Model") targets = ["PM10", "PM25"] # targets = ["SO2", "CO", "O3", "NO2", "PM10", "PM25", # "temp", "u", "v", "pres", "humid", "prep", "snow"] # 24*14 = 336 #sample_size = 336 sample_size = 48 output_size = 24 # If you want to debug, fast_dev_run = True and n_trials should be small number fast_dev_run = False n_trials = 128 # fast_dev_run = True # n_trials = 1 # Hyper parameter epoch_size = 500 batch_size = 64 learning_rate = 1e-3 # Blocked Cross Validation # neglect small overlap between train_dates and valid_dates # 11y = ((2y, 0.5y), (2y, 0.5y), (2y, 0.5y), (2.5y, 1y)) train_dates = [(dt.datetime(2008, 1, 4, 1).astimezone(SEOULTZ), dt.datetime(2009, 12, 31, 23).astimezone(SEOULTZ)), (dt.datetime(2010, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2012, 6, 30, 23).astimezone(SEOULTZ)), (dt.datetime(2013, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2014, 12, 31, 23).astimezone(SEOULTZ)), (dt.datetime(2015, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2017, 12, 31, 23).astimezone(SEOULTZ))] valid_dates = [(dt.datetime(2010, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2010, 6, 30, 23).astimezone(SEOULTZ)), (dt.datetime(2012, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2012, 12, 31, 23).astimezone(SEOULTZ)), (dt.datetime(2015, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2015, 6, 30, 23).astimezone(SEOULTZ)), (dt.datetime(2018, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ))] train_valid_fdate = dt.datetime(2008, 1, 3, 1).astimezone(SEOULTZ) train_valid_tdate = dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ) # Debug if fast_dev_run: train_dates = [(dt.datetime(2015, 7, 1, 0).astimezone(SEOULTZ), dt.datetime(2017, 12, 31, 23).astimezone(SEOULTZ))] valid_dates = [(dt.datetime(2018, 1, 1, 0).astimezone(SEOULTZ), dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ))] train_valid_fdate = dt.datetime(2015, 7, 1, 0).astimezone(SEOULTZ) train_valid_tdate = dt.datetime(2018, 12, 31, 23).astimezone(SEOULTZ) test_fdate = dt.datetime(2019, 1, 1, 0).astimezone(SEOULTZ) test_tdate = dt.datetime(2020, 10, 31, 23).astimezone(SEOULTZ) # check date range assumption assert len(train_dates) == len(valid_dates) for i, (td, vd) in enumerate(zip(train_dates, valid_dates)): assert vd[0] > td[1] assert test_fdate > train_dates[-1][1] assert test_fdate > valid_dates[-1][1] train_features = [ "SO2", "CO", "NO2", "PM10", "PM25", "temp", "wind_spd", "wind_cdir", "wind_sdir", "pres", "humid", "prep" ] train_features_periodic = [ "SO2", "CO", "NO2", "PM10", "PM25", "temp", "wind_spd", "wind_cdir", "wind_sdir", "pres", "humid" ] train_features_nonperiodic = ["prep"] for target in targets: print("Training " + target + "...") output_dir = Path( f"/mnt/data/MLPMSMultivariate/{station_name}/{target}/") Path.mkdir(output_dir, parents=True, exist_ok=True) model_dir = output_dir / "models" Path.mkdir(model_dir, parents=True, exist_ok=True) log_dir = output_dir / "log" Path.mkdir(log_dir, parents=True, exist_ok=True) _df_h = data.load_imputed(HOURLY_DATA_PATH) df_h = _df_h.query('stationCode == "' + str(SEOUL_STATIONS[station_name]) + '"') if station_name == '종로구' and \ not Path("/input/python/input_jongno_imputed_hourly_pandas.csv").is_file(): # load imputed result df_h.to_csv("/input/python/input_jongno_imputed_hourly_pandas.csv") # construct dataset for seasonality print("Construct Train/Validation Sets...", flush=True) train_valid_dataset = construct_dataset(train_valid_fdate, train_valid_tdate, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, transform=False) # compute seasonality train_valid_dataset.preprocess() # For Block Cross Validation.. # load dataset in given range dates and transform using scaler from train_valid_set # all dataset are saved in tuple print("Construct Training Sets...", flush=True) train_datasets = tuple( construct_dataset(td[0], td[1], scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, transform=True) for td in train_dates) print("Construct Validation Sets...", flush=True) valid_datasets = tuple( construct_dataset(vd[0], vd[1], scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, transform=True) for vd in valid_dates) # just single test set print("Construct Test Sets...", flush=True) test_dataset = construct_dataset( test_fdate, test_tdate, scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, filepath=HOURLY_DATA_PATH, station_name=station_name, target=target, sample_size=sample_size, output_size=output_size, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, transform=True) # convert tuple of datasets to ConcatDataset train_dataset = ConcatDataset(train_datasets) val_dataset = ConcatDataset(valid_datasets) # num_layer == number of hidden layer hparams = Namespace(num_layers=1, layer_size=128, learning_rate=learning_rate, batch_size=batch_size) def objective(trial): model = BaseMLPModel( trial=trial, hparams=hparams, input_size=sample_size * len(train_features), sample_size=sample_size, output_size=output_size, station_name=station_name, target=target, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, train_dataset=train_dataset, val_dataset=val_dataset, test_dataset=test_dataset, scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, output_dir=output_dir) # most basic trainer, uses good defaults trainer = Trainer(gpus=1 if torch.cuda.is_available() else None, precision=32, min_epochs=1, max_epochs=20, default_root_dir=output_dir, fast_dev_run=fast_dev_run, logger=True, checkpoint_callback=False, callbacks=[ PyTorchLightningPruningCallback( trial, monitor="valid/MSE") ]) trainer.fit(model) # Don't Log # hyperparameters = model.hparams # trainer.logger.log_hyperparams(hyperparameters) return trainer.callback_metrics.get("valid/MSE") if n_trials > 1: study = optuna.create_study(direction="minimize") study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 8, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 64, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 8, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 1.3, 'num_layers': 12, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 0.7, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) study.enqueue_trial({ 'sigma': 2.0, 'num_layers': 4, 'layer_size': 32, 'learning_rate': learning_rate, 'batch_size': batch_size }) # timeout = 3600*36 = 36h study.optimize(objective, n_trials=n_trials, timeout=3600 * 36) trial = study.best_trial print(" Value: ", trial.value) print(" Params: ") for key, value in trial.params.items(): print(" {}: {}".format(key, value)) print("sample_size : ", sample_size) print("output_size : ", output_size) # plot optmization results fig_cont1 = optv.plot_contour(study, params=['num_layers', 'layer_size']) fig_cont1.write_image( str(output_dir / "contour_num_layers_layer_size.png")) fig_cont1.write_image( str(output_dir / "contour_num_layers_layer_size.svg")) fig_edf = optv.plot_edf(study) fig_edf.write_image(str(output_dir / "edf.png")) fig_edf.write_image(str(output_dir / "edf.svg")) fig_iv = optv.plot_intermediate_values(study) fig_iv.write_image(str(output_dir / "intermediate_values.png")) fig_iv.write_image(str(output_dir / "intermediate_values.svg")) fig_his = optv.plot_optimization_history(study) fig_his.write_image(str(output_dir / "opt_history.png")) fig_his.write_image(str(output_dir / "opt_history.svg")) fig_pcoord = optv.plot_parallel_coordinate( study, params=['num_layers', 'layer_size']) fig_pcoord.write_image(str(output_dir / "parallel_coord.png")) fig_pcoord.write_image(str(output_dir / "parallel_coord.svg")) fig_slice = optv.plot_slice(study, params=['num_layers', 'layer_size']) fig_slice.write_image(str(output_dir / "slice.png")) fig_slice.write_image(str(output_dir / "slice.svg")) # set hparams with optmized value hparams.num_layers = trial.params['num_layers'] hparams.layer_size = trial.params['layer_size'] dict_hparams = copy.copy(vars(hparams)) dict_hparams["sample_size"] = sample_size dict_hparams["output_size"] = output_size with open(output_dir / 'hparams.json', 'w') as f: print(dict_hparams, file=f) with open(output_dir / 'hparams.csv', 'w') as f: print(pd.DataFrame.from_dict(dict_hparams, orient='index'), file=f) model = BaseMLPModel(hparams=hparams, input_size=sample_size * len(train_features), sample_size=sample_size, output_size=output_size, station_name=station_name, target=target, features=train_features, features_periodic=train_features_periodic, features_nonperiodic=train_features_nonperiodic, train_dataset=train_dataset, val_dataset=val_dataset, test_dataset=test_dataset, scaler_X=train_valid_dataset.scaler_X, scaler_Y=train_valid_dataset.scaler_Y, output_dir=output_dir) # record input for i, _train_set in enumerate(train_datasets): _train_set.to_csv( model.data_dir / ("df_trainset_{0}_".format(str(i).zfill(2)) + target + ".csv")) for i, _valid_set in enumerate(valid_datasets): _valid_set.to_csv( model.data_dir / ("df_validset_{0}_".format(str(i).zfill(2)) + target + ".csv")) train_valid_dataset.to_csv(model.data_dir / ("df_trainvalidset_" + target + ".csv")) test_dataset.to_csv(model.data_dir / ("df_testset_" + target + ".csv")) checkpoint_callback = pl.callbacks.ModelCheckpoint(os.path.join( model_dir, "train_{epoch}_{valid/MSE:.2f}"), monitor="valid/MSE", period=10) early_stop_callback = EarlyStopping(monitor='valid/MSE', min_delta=0.001, patience=30, verbose=True, mode='min') log_version = dt.date.today().strftime("%y%m%d-%H-%M") loggers = [ \ TensorBoardLogger(log_dir, version=log_version), CSVLogger(log_dir, version=log_version)] # most basic trainer, uses good defaults trainer = Trainer(gpus=1 if torch.cuda.is_available() else None, precision=32, min_epochs=1, max_epochs=epoch_size, default_root_dir=output_dir, fast_dev_run=fast_dev_run, logger=loggers, log_every_n_steps=5, flush_logs_every_n_steps=10, callbacks=[early_stop_callback], checkpoint_callback=checkpoint_callback) trainer.fit(model) # run test set trainer.test(ckpt_path=None) shutil.rmtree(model_dir)
def show_params(self, params=None): return visualization.plot_slice(self.study, params=params)
def log_study_info(study, experiment=None, log_study=True, log_charts=True, log_optimization_history=False, log_contour=False, log_parallel_coordinate=False, log_slice=False, params=None): """Logs runs results and parameters to neptune. Logs all hyperparameter optimization results to Neptune. Those include best score ('best_score' metric), best parameters ('best_parameters' property), the study object itself as artifact, and interactive optuna charts ('contour', 'parallel_coordinate', 'slice', 'optimization_history') as artifacts in 'charts' sub folder. Args: study('optuna.study.Study'): Optuna study object after training is completed. experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. log_study('bool'): Whether optuna study object should be logged as pickle. Default is True. log_charts('bool'): Deprecated argument. Whether all optuna visualizations charts should be logged. By default all charts are sent. To not log any charts set log_charts=False. If you want to log a particular chart change the argument for that chart explicitly. For example log_charts=False and log_slice=True will log only the slice plot to Neptune. log_optimization_history('bool'): Whether optuna optimization history chart should be logged. Default is True. log_contour('bool'): Whether optuna contour plot should be logged. Default is True. log_parallel_coordinate('bool'): Whether optuna parallel coordinate plot should be logged. Default is True. log_slice('bool'): Whether optuna slice chart should be logged. Default is True. params(`list`): List of parameters to be visualized. Default is all parameters. Examples: Initialize neptune_monitor:: import neptune import neptunecontrib.monitoring.optuna as opt_utils neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME') neptune.create_experiment(name='optuna sweep') neptune_callback = opt_utils.NeptuneCallback() Run Optuna training passing monitor as callback:: ... study = optuna.create_study(direction='maximize') study.optimize(objective, n_trials=100, callbacks=[neptune_callback]) opt_utils.log_study_info(study) You can explore an example experiment in Neptune: https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-1016/artifacts """ import optuna.visualization as vis _exp = experiment if experiment else neptune _exp.log_metric('best_score', study.best_value) _exp.set_property('best_parameters', study.best_params) if log_charts: message = """log_charts argument is depraceted and will be removed in future releases. Please use log_optimization_history, log_contour, log_parallel_coordinate, log_slice, arguments explicitly. """ warnings.warn(message) log_optimization_history = True log_contour = True log_parallel_coordinate = True log_slice = True if log_study: pickle_and_log_artifact(study, 'study.pkl', experiment=_exp) if log_optimization_history: log_chart(name='optimization_history', chart=vis.plot_optimization_history(study), experiment=_exp) if log_contour: log_chart(name='contour', chart=vis.plot_contour(study, params=params), experiment=_exp) if log_parallel_coordinate: log_chart(name='parallel_coordinate', chart=vis.plot_parallel_coordinate(study, params=params), experiment=_exp) if log_slice: log_chart(name='slice', chart=vis.plot_slice(study, params=params), experiment=_exp)
best_model_predict = best_model.predict(x_test) print('\nBest Model performance at competition:') print( 'RMSE: {:.4f} (should be lower than the trivial predictor using the mean MSE: {:.4f})' .format( math.sqrt(metrics.mean_squared_error(y_test, best_model_predict)), math.sqrt( metrics.mean_squared_error( y_test, [y_test.mean() for i in range(len(y_test))])))) print( 'R square: {:.4f} (should be higher than the trivial predictor using the mean: R square {:.4f})' .format( metrics.r2_score(y_test, best_model_predict), metrics.r2_score(y_test, [y_test.mean() for i in range(len(y_test))]))) #3.8 Final model train best_model.fit(x, y) y_comp = [math.exp(i) for i in best_model.predict(x_comp)] submission = pd.DataFrame(columns=['Id', 'SalePrice']) submission['Id'] = pd.Series(range(1461, 2920)) submission['SalePrice'] = pd.Series(y_comp) submission.to_csv('submission.csv', index=False) #3.9 Optuna visualization ov.plot_optimization_history(knn_optuna).show() #ov.plot_parallel_coordinate(knn_optuna).show() ov.plot_contour(knn_optuna).show() ov.plot_slice(knn_optuna).show() ov.plot_param_importances(knn_optuna).show() #ov.plot_edf(knn_optuna).show()