def make_study(restart=False): ''' Make a study, deleting one if it already exists ''' try: if restart: print(f'About to delete {storage}:{name}, you have 5 seconds to intervene!') sc.timedsleep(5.0) op.delete_study(storage=storage, study_name=name) except: pass output = op.create_study(storage=storage, study_name=name, load_if_exists=not(restart)) return output
def fix_broken_study(_study: optuna.study.Study, name: str, storage: str, direction: str, sampler: optuna.samplers.BaseSampler): """ This method removes broken trials, which are those that failed to complete 1 epoch before slurm (or something else) killed the job and returned NAN or NONE. Failure to remove these trails leads to a error when optuna tries to update the parameters. This is because these trails only have "NoneType" data associated with them, but we need numerical data (e.g. the loss value) to update parameters. """ if len(_study.trials) == 0: return _study, [] trials = [] removed = [] for trial in _study.trials: if len(trial.intermediate_values) == 0: trials.append(trial) continue step, intermediate_value = max(trial.intermediate_values.items()) if intermediate_value is not None: trials.append(trial) else: removed.append(trial.number + 1) if len(removed) == 0: return _study, [] # Delete the current study optuna.delete_study(study_name=name, storage=storage) # Create a new one in its place if isinstance(direction, str): study_fixed = optuna.create_study(study_name=name, storage=storage, direction=direction, sampler=sampler, load_if_exists=False) else: study_fixed = optuna.multi_objective.create_study(study_name=name, storage=storage, directions=direction, sampler=sampler, load_if_exists=False) # Add the working trials to the new study for trial in trials: study_fixed.add_trial(trial) return study_fixed, removed
def calcula_resultados(self) -> List[Resultado]: """ Retorna, para cada fold, o seu respectivo resultado """ self._resultados = [] self.arr_validacao_por_fold = [] #experimentos de validacao por fold #seed para mater a reprodutibilidade dos experimentos np.random.seed(1) ## Para cada fold for i, fold in enumerate(self.folds): ##1. Caso haja um metodo de otimizacao, obtenha o melhor metodo com ele if (self.ClasseObjetivoOtimizacao is not None): try: if not self.load_if_exists: optuna.delete_study( study_name=f"{self.nom_experimento}_fold_{i}", storage=f'sqlite:///resultados/optuna_studies.db') except KeyError: pass study = optuna.create_study( study_name=f"{self.nom_experimento}_fold_{i}", sampler=self.sampler, direction="maximize", storage=f'sqlite:///resultados/optuna_studies.db', load_if_exists=self.load_if_exists) objetivo_otimizacao = self.ClasseObjetivoOtimizacao( fold, self.preproc_method) study.optimize(objetivo_otimizacao, self.num_trials) #obtem o melhor metodo da otimizacao best_method = objetivo_otimizacao.arr_evaluated_methods[ study.best_trial.number] self.studies_per_fold.append(study) else: #caso contrario, o metodo, atributo da classe Experimento (sem modificações) é usado best_method = self.ClasseObjetivoOtimizacao.ml_method_default ##2. adiciona em resultados o resultado predito usando o melhor metodo resultado = best_method.eval(self.preproc_method, fold.df_treino, fold.df_data_to_predict, fold.col_classe) print(resultado.macro_f1) self._resultados.append(resultado) return self._resultados
def test_delete_study(storage_mode: str) -> None: with StorageSupplier(storage_mode) as storage: # Test deleting a non-existing study. with pytest.raises(KeyError): delete_study(study_name="invalid-study-name", storage=storage) # Test deleting an existing study. study = create_study(storage=storage, load_if_exists=False) delete_study(study_name=study.study_name, storage=storage) # Test failed to delete the study which is already deleted. with pytest.raises(KeyError): delete_study(study_name=study.study_name, storage=storage)
def test_delete_study(storage_mode: str) -> None: with StorageSupplier(storage_mode) as storage: # Get storage object because delete_study does not accept None. storage = get_storage(storage=storage) assert storage is not None # Test deleting a non-existing study. with pytest.raises(KeyError): delete_study("invalid-study-name", storage) # Test deleting an existing study. study = create_study(storage=storage, load_if_exists=False) delete_study(study.study_name, storage) # Test failed to delete the study which is already deleted. with pytest.raises(KeyError): delete_study(study.study_name, storage)
def test_delete_study(storage_mode, cache_mode): # type: (str, bool) -> None with StorageSupplier(storage_mode, cache_mode) as storage: # Get storage object because delete_study does not accept None. storage = optuna.storages.get_storage(storage=storage) assert storage is not None # Test deleting a non-existing study. with pytest.raises(ValueError): optuna.delete_study("invalid-study-name", storage) # Test deleting an existing study. study = optuna.create_study(storage=storage, load_if_exists=False) optuna.delete_study(study.study_name, storage) # Test failed to delete the study which is already deleted. if not isinstance(study._storage, optuna.storages.InMemoryStorage): # Skip `InMemoryStorage` because it just internally initializes trials and so on. with pytest.raises(ValueError): optuna.delete_study(study.study_name, storage)
def optimize(self) -> TuningResult: """ Method performs a hyperparameter optimization run according to the selected HPO-method. :return: result: TuningResult TuningResult-object that contains the results of this optimization run. """ # Select the specified HPO-tuning method if self.hpo_method == 'CMA-ES': this_optimizer = CmaEsSampler(seed=self.random_seed) elif self.hpo_method == 'TPE': this_optimizer = TPESampler(seed=self.random_seed) elif self.hpo_method == 'RandomSearch': this_optimizer = RandomSampler(seed=self.random_seed) else: raise Exception('Unknown HPO-method!') # Create a study object and specify the optimization direction study_name = 'hpo_study' study_storage = 'sqlite:///hpo.db' # Optimize on the predefined n_func_evals and measure the wall clock times # start_time = time.time() self.times = [] # Initialize a list for saving the wall clock times # Delete old study objects ('fresh start') >> otherwise the old results will be included try: optuna.delete_study(study_name, study_storage) except: print('No old optuna study objects found!') # Use a warmstart configuration? if self.do_warmstart == 'Yes': try: # Create a new study warmstart_study = optuna.create_study(direction='minimize', storage=study_storage, study_name=study_name, load_if_exists=False) # Retrieve the warmstart hyperparameters for the ML-algorithm warmstart_params = self.get_warmstart_configuration() # Initialize a dictionary for the warmstart HP-configuration warmstart_dict = {} # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and append the default values # to the dictionary for i in range(len(self.hp_space)): this_param = self.hp_space[i].name this_warmstart_value = warmstart_params[this_param] # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is # different (e.g. int) if this_warmstart_value is None and type( self.hp_space[i]) == skopt.space.space.Integer: # Try to impute these values by the mean value this_warmstart_value = int( 0.5 * (self.hp_space[i].low + self.hp_space[i].high)) # Add the warm start HP-value to the dictionary warmstart_dict[this_param] = this_warmstart_value # Enqueue a trial with the warm start HP-values warmstart_study.enqueue_trial(params=warmstart_dict) # Optimize to ensure that the warm start configuration is evaluated first (e.g. for parallel processes) warmstart_study.optimize(func=self.objective, n_trials=1) # Set flag to indicate that a warmstart took place did_warmstart = True except: print('Warmstarting optuna failed!') # Set flag to indicate that NO warmstart took place did_warmstart = False # No warmstart requested else: # Set flag to indicate that NO warmstart took place did_warmstart = False # Create a new study or reload the warmstart study (if available and requested) study = optuna.create_study(sampler=this_optimizer, direction='minimize', study_name=study_name, storage=study_storage, load_if_exists=True) # If a warm start took place, reduce the number of remaining function evaluations to ensure comparability # (equal budgets) if did_warmstart: n_func_evals = self.n_func_evals - 1 else: n_func_evals = self.n_func_evals # Start the optimization try: study.optimize(func=self.objective, n_trials=n_func_evals, n_jobs=self.n_workers) run_successful = True # Algorithm crashed except: # Add a warning here run_successful = False # If the optimization run was successful, determine the optimization results if run_successful: # Create a TuningResult-object to store the optimization results # Transformation of the results into a TuningResult-Object all_trials = study.get_trials() best_configuration = study.best_params best_val_loss = study.best_value start_times = [] # Start time of each trial finish_times = [] # Finish time of each trial # evaluation_ids = [] # Number the evaluations / iterations of this run unsorted_losses = [] # Loss of each iteration unsorted_configurations = () # HP-configuration of each iteration # Number the evaluations / iterations of this run evaluation_ids = list(range(1, len(all_trials) + 1)) for i in range(len(all_trials)): start_times.append(all_trials[i].datetime_start) finish_times.append(all_trials[i].datetime_complete) # evaluation_ids.append(all_trials[i].number) unsorted_losses.append(all_trials[i].value) unsorted_configurations = unsorted_configurations + ( all_trials[i].params, ) abs_start_time = min(start_times) # start time of the first trial unsorted_timestamps = [] for i in range(len(start_times)): this_time = finish_times[ i] - abs_start_time # time difference to the start of the first trial this_timestamp = this_time.total_seconds( ) # conversion into float value unsorted_timestamps.append(this_timestamp) wall_clock_time = max(unsorted_timestamps) ids = list(range(1, len(all_trials) + 1)) temp_dict = { 'ids': ids, 'timestamps [finished]': unsorted_timestamps, 'losses': unsorted_losses, 'configurations': unsorted_configurations, } unsorted_df = pd.DataFrame.from_dict(data=temp_dict) unsorted_df.set_index('ids', inplace=True) # Sort DataFrame according to timestamps (necessary for multiprocessing) sorted_df = unsorted_df.sort_values(by=['timestamps [finished]'], ascending=True, inplace=False) timestamps = list(sorted_df['timestamps [finished]']) losses = list(sorted_df['losses']) configurations = tuple(sorted_df['configurations']) # Optuna uses full budgets for its HPO methods budget = [100.0] * len(losses) # Compute the loss on the test set for the best found configuration test_loss = self.train_evaluate_ml_model(params=best_configuration, cv_mode=False, test_mode=True) # Run not successful (algorithm crashed) else: evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \ test_loss, budget = self.impute_results_for_crash() # Pass the results to a TuningResult-object result = TuningResult(evaluation_ids=evaluation_ids, timestamps=timestamps, losses=losses, configurations=configurations, best_val_loss=best_val_loss, best_configuration=best_configuration, wall_clock_time=wall_clock_time, test_loss=test_loss, successful=run_successful, did_warmstart=did_warmstart, budget=budget) return result
import optuna # pip install optuna optuna.create_study() optuna.load_study() optuna.delete_study() optuna.copy_study() optuna.get_all_study_summaries() optuna.TrialPruned trial = Trial(study, trial_id) trial.datetime_start trial.distributions trial.number trial.params trial.system_attrs trial.user_attrs trial.report(value, step) trial.set_system_attr(key, value) trial.set_user_attr(key, value) trial.should_prune() trial.suggest_categorical(name, choices) trial.suggest_discrete_uniform(name, low, high, q) trial.suggest_float(name, low, high, *, ?step, ?log) trial.suggest_int(name, low, high, ?step, ?log) trial.suggest_loguniform(name, low, high) trial.suggest_uniform(name, low, high) #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ import tensorflow as tf from tf import keras
psql_url = f"postgresql://{DBUSER}@trans-db-01/{DBNAME}?options=-c%20search_path={DBSCHEMA}" engine = create_engine(psql_url) if __name__ == "__main__": outcome_type = snakemake.wildcards["outcome_type"] model_type = snakemake.wildcards["model_type"] outcome_variable = snakemake.wildcards["outcome_variable"] study_name = f"{outcome_type}__{model_type}__{outcome_variable}" feature_subset = make_feature_subset(outcome_type) storage = optuna.storages.RDBStorage(url=psql_url, engine_kwargs={"pool_size": 0}) try: optuna.delete_study(storage=storage, study_name=study_name) except: pass study = optuna.create_study( storage=storage, study_name=study_name, direction="minimize", pruner=optuna.pruners.HyperbandPruner(min_resource=15, reduction_factor=3), sampler=optuna.samplers.TPESampler(multivariate=True, seed=42) ) study.set_user_attr("contributors", ["benkaa"]) n_jobs = snakemake.threads n_trials = snakemake.params.n_optuna_trials // n_jobs def optimize(n_trials):
def make_study(): try: op.delete_study(storage=storage, study_name=name) except: pass return op.create_study(storage=storage, study_name=name)
# Check if the study record already exists. try: optuna.load_study( study_name=study_name, storage=storage, #direction = direction, sampler=sampler) except KeyError: # The study name was not in storage, can proceed pass except: if args_dict["override"]: message = f"Removing the study that exists in storage {storage}." optuna.delete_study(study_name=study_name, storage=storage, direction=direction, sampler=sampler) else: message = f"The study {study_name} already exists in storage and reload was False." message += f" Delete it from {storage}, and try again or rerun this script" message += f" with the flag: --override 1" raise OSError(message) # Create a new study in the storage object if single_objective: create_study = optuna.create_study(study_name=study_name, storage=storage, direction=direction, sampler=sampler) else: create_study = optuna.multi_objective.study.create_study(