def make_study(restart=False):
    ''' Make a study, deleting one if it already exists '''
    try:
        if restart:
            print(f'About to delete {storage}:{name}, you have 5 seconds to intervene!')
            sc.timedsleep(5.0)
            op.delete_study(storage=storage, study_name=name)
    except:
        pass
    output = op.create_study(storage=storage, study_name=name, load_if_exists=not(restart))
    return output
Example #2
0
def fix_broken_study(_study: optuna.study.Study, name: str, storage: str,
                     direction: str, sampler: optuna.samplers.BaseSampler):
    """
        This method removes broken trials, which are those 
        that failed to complete 1 epoch before slurm (or something else) killed the job
        and returned NAN or NONE.
        
        Failure to remove these trails leads to a error when optuna tries to update the 
        parameters. This is because these trails only have "NoneType" data associated 
        with them, but we need numerical data (e.g. the loss value) to update parameters.
    """

    if len(_study.trials) == 0:
        return _study, []

    trials = []
    removed = []
    for trial in _study.trials:
        if len(trial.intermediate_values) == 0:
            trials.append(trial)
            continue
        step, intermediate_value = max(trial.intermediate_values.items())
        if intermediate_value is not None:
            trials.append(trial)
        else:
            removed.append(trial.number + 1)

    if len(removed) == 0:
        return _study, []

    # Delete the current study
    optuna.delete_study(study_name=name, storage=storage)

    # Create a new one in its place
    if isinstance(direction, str):
        study_fixed = optuna.create_study(study_name=name,
                                          storage=storage,
                                          direction=direction,
                                          sampler=sampler,
                                          load_if_exists=False)
    else:
        study_fixed = optuna.multi_objective.create_study(study_name=name,
                                                          storage=storage,
                                                          directions=direction,
                                                          sampler=sampler,
                                                          load_if_exists=False)

    # Add the working trials to the new study
    for trial in trials:
        study_fixed.add_trial(trial)

    return study_fixed, removed
Example #3
0
    def calcula_resultados(self) -> List[Resultado]:
        """
        Retorna, para cada fold, o seu respectivo resultado
        """
        self._resultados = []
        self.arr_validacao_por_fold = []  #experimentos de validacao por fold
        #seed para mater a reprodutibilidade dos experimentos
        np.random.seed(1)
        ## Para cada fold
        for i, fold in enumerate(self.folds):

            ##1. Caso haja um metodo de otimizacao, obtenha o melhor metodo com ele
            if (self.ClasseObjetivoOtimizacao is not None):
                try:
                    if not self.load_if_exists:
                        optuna.delete_study(
                            study_name=f"{self.nom_experimento}_fold_{i}",
                            storage=f'sqlite:///resultados/optuna_studies.db')
                except KeyError:
                    pass
                study = optuna.create_study(
                    study_name=f"{self.nom_experimento}_fold_{i}",
                    sampler=self.sampler,
                    direction="maximize",
                    storage=f'sqlite:///resultados/optuna_studies.db',
                    load_if_exists=self.load_if_exists)
                objetivo_otimizacao = self.ClasseObjetivoOtimizacao(
                    fold, self.preproc_method)
                study.optimize(objetivo_otimizacao, self.num_trials)
                #obtem o melhor metodo da otimizacao
                best_method = objetivo_otimizacao.arr_evaluated_methods[
                    study.best_trial.number]
                self.studies_per_fold.append(study)
            else:
                #caso contrario, o metodo, atributo da classe Experimento (sem modificações) é usado
                best_method = self.ClasseObjetivoOtimizacao.ml_method_default

            ##2. adiciona em resultados o resultado predito usando o melhor metodo
            resultado = best_method.eval(self.preproc_method, fold.df_treino,
                                         fold.df_data_to_predict,
                                         fold.col_classe)
            print(resultado.macro_f1)
            self._resultados.append(resultado)
        return self._resultados
Example #4
0
def test_delete_study(storage_mode: str) -> None:

    with StorageSupplier(storage_mode) as storage:
        # Test deleting a non-existing study.
        with pytest.raises(KeyError):
            delete_study(study_name="invalid-study-name", storage=storage)

        # Test deleting an existing study.
        study = create_study(storage=storage, load_if_exists=False)
        delete_study(study_name=study.study_name, storage=storage)

        # Test failed to delete the study which is already deleted.
        with pytest.raises(KeyError):
            delete_study(study_name=study.study_name, storage=storage)
Example #5
0
def test_delete_study(storage_mode: str) -> None:

    with StorageSupplier(storage_mode) as storage:
        # Get storage object because delete_study does not accept None.
        storage = get_storage(storage=storage)
        assert storage is not None

        # Test deleting a non-existing study.
        with pytest.raises(KeyError):
            delete_study("invalid-study-name", storage)

        # Test deleting an existing study.
        study = create_study(storage=storage, load_if_exists=False)
        delete_study(study.study_name, storage)

        # Test failed to delete the study which is already deleted.
        with pytest.raises(KeyError):
            delete_study(study.study_name, storage)
Example #6
0
def test_delete_study(storage_mode, cache_mode):
    # type: (str, bool) -> None

    with StorageSupplier(storage_mode, cache_mode) as storage:
        # Get storage object because delete_study does not accept None.
        storage = optuna.storages.get_storage(storage=storage)
        assert storage is not None

        # Test deleting a non-existing study.
        with pytest.raises(ValueError):
            optuna.delete_study("invalid-study-name", storage)

        # Test deleting an existing study.
        study = optuna.create_study(storage=storage, load_if_exists=False)
        optuna.delete_study(study.study_name, storage)

        # Test failed to delete the study which is already deleted.
        if not isinstance(study._storage, optuna.storages.InMemoryStorage):
            # Skip `InMemoryStorage` because it just internally initializes trials and so on.
            with pytest.raises(ValueError):
                optuna.delete_study(study.study_name, storage)
Example #7
0
    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        """

        # Select the specified HPO-tuning method
        if self.hpo_method == 'CMA-ES':
            this_optimizer = CmaEsSampler(seed=self.random_seed)

        elif self.hpo_method == 'TPE':
            this_optimizer = TPESampler(seed=self.random_seed)

        elif self.hpo_method == 'RandomSearch':
            this_optimizer = RandomSampler(seed=self.random_seed)

        else:
            raise Exception('Unknown HPO-method!')

        # Create a study object and specify the optimization direction
        study_name = 'hpo_study'
        study_storage = 'sqlite:///hpo.db'

        # Optimize on the predefined n_func_evals and measure the wall clock times
        # start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        # Delete old study objects ('fresh start') >> otherwise the old results will be included
        try:
            optuna.delete_study(study_name, study_storage)

        except:
            print('No old optuna study objects found!')

        # Use a warmstart configuration?
        if self.do_warmstart == 'Yes':

            try:
                # Create a new study
                warmstart_study = optuna.create_study(direction='minimize',
                                                      storage=study_storage,
                                                      study_name=study_name,
                                                      load_if_exists=False)

                # Retrieve the warmstart hyperparameters for the ML-algorithm
                warmstart_params = self.get_warmstart_configuration()

                # Initialize a dictionary for the warmstart HP-configuration
                warmstart_dict = {}

                # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and append the default values
                # to the dictionary
                for i in range(len(self.hp_space)):

                    this_param = self.hp_space[i].name
                    this_warmstart_value = warmstart_params[this_param]

                    # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is
                    # different (e.g. int)
                    if this_warmstart_value is None and type(
                            self.hp_space[i]) == skopt.space.space.Integer:
                        # Try to impute these values by the mean value
                        this_warmstart_value = int(
                            0.5 *
                            (self.hp_space[i].low + self.hp_space[i].high))

                    # Add the warm start HP-value to the dictionary
                    warmstart_dict[this_param] = this_warmstart_value

                # Enqueue a trial with the warm start HP-values
                warmstart_study.enqueue_trial(params=warmstart_dict)

                # Optimize to ensure that the warm start configuration is evaluated first (e.g. for parallel processes)
                warmstart_study.optimize(func=self.objective, n_trials=1)

                # Set flag to indicate that a warmstart took place
                did_warmstart = True

            except:
                print('Warmstarting optuna failed!')

                # Set flag to indicate that NO warmstart took place
                did_warmstart = False

        # No warmstart requested
        else:

            # Set flag to indicate that NO warmstart took place
            did_warmstart = False

        # Create a new study or reload the warmstart study (if available and requested)
        study = optuna.create_study(sampler=this_optimizer,
                                    direction='minimize',
                                    study_name=study_name,
                                    storage=study_storage,
                                    load_if_exists=True)

        # If a warm start took place, reduce the number of remaining function evaluations to ensure comparability
        # (equal budgets)
        if did_warmstart:
            n_func_evals = self.n_func_evals - 1
        else:
            n_func_evals = self.n_func_evals

        # Start the optimization
        try:

            study.optimize(func=self.objective,
                           n_trials=n_func_evals,
                           n_jobs=self.n_workers)
            run_successful = True

        # Algorithm crashed
        except:
            # Add a warning here
            run_successful = False

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            # Create a TuningResult-object to store the optimization results
            # Transformation of the results into a TuningResult-Object
            all_trials = study.get_trials()
            best_configuration = study.best_params
            best_val_loss = study.best_value

            start_times = []  # Start time of each trial
            finish_times = []  # Finish time of each trial
            # evaluation_ids = []  # Number the evaluations / iterations of this run
            unsorted_losses = []  # Loss of each iteration
            unsorted_configurations = ()  # HP-configuration of each iteration

            # Number the evaluations / iterations of this run
            evaluation_ids = list(range(1, len(all_trials) + 1))

            for i in range(len(all_trials)):
                start_times.append(all_trials[i].datetime_start)
                finish_times.append(all_trials[i].datetime_complete)

                # evaluation_ids.append(all_trials[i].number)
                unsorted_losses.append(all_trials[i].value)
                unsorted_configurations = unsorted_configurations + (
                    all_trials[i].params, )

            abs_start_time = min(start_times)  # start time of the first trial
            unsorted_timestamps = []
            for i in range(len(start_times)):
                this_time = finish_times[
                    i] - abs_start_time  # time difference to the start of the first trial
                this_timestamp = this_time.total_seconds(
                )  # conversion into float value
                unsorted_timestamps.append(this_timestamp)

            wall_clock_time = max(unsorted_timestamps)

            ids = list(range(1, len(all_trials) + 1))
            temp_dict = {
                'ids': ids,
                'timestamps [finished]': unsorted_timestamps,
                'losses': unsorted_losses,
                'configurations': unsorted_configurations,
            }

            unsorted_df = pd.DataFrame.from_dict(data=temp_dict)
            unsorted_df.set_index('ids', inplace=True)

            # Sort DataFrame according to timestamps (necessary for multiprocessing)
            sorted_df = unsorted_df.sort_values(by=['timestamps [finished]'],
                                                ascending=True,
                                                inplace=False)

            timestamps = list(sorted_df['timestamps [finished]'])
            losses = list(sorted_df['losses'])
            configurations = tuple(sorted_df['configurations'])

            # Optuna uses full budgets for its HPO methods
            budget = [100.0] * len(losses)

            # Compute the loss on the test set for the best found configuration
            test_loss = self.train_evaluate_ml_model(params=best_configuration,
                                                     cv_mode=False,
                                                     test_mode=True)

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \
            test_loss, budget = self.impute_results_for_crash()

        # Pass the results to a TuningResult-object
        result = TuningResult(evaluation_ids=evaluation_ids,
                              timestamps=timestamps,
                              losses=losses,
                              configurations=configurations,
                              best_val_loss=best_val_loss,
                              best_configuration=best_configuration,
                              wall_clock_time=wall_clock_time,
                              test_loss=test_loss,
                              successful=run_successful,
                              did_warmstart=did_warmstart,
                              budget=budget)

        return result
Example #8
0
import optuna # pip install optuna

optuna.create_study()
optuna.load_study()
optuna.delete_study()
optuna.copy_study()
optuna.get_all_study_summaries()
optuna.TrialPruned

trial = Trial(study, trial_id)
trial.datetime_start
trial.distributions
trial.number
trial.params
trial.system_attrs
trial.user_attrs

trial.report(value, step)
trial.set_system_attr(key, value)
trial.set_user_attr(key, value)
trial.should_prune()
trial.suggest_categorical(name, choices)
trial.suggest_discrete_uniform(name, low, high, q)
trial.suggest_float(name, low, high, *, ?step, ?log)
trial.suggest_int(name, low, high, ?step, ?log)
trial.suggest_loguniform(name, low, high)
trial.suggest_uniform(name, low, high)

#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
import tensorflow as tf
from tf import keras
psql_url = f"postgresql://{DBUSER}@trans-db-01/{DBNAME}?options=-c%20search_path={DBSCHEMA}"
engine = create_engine(psql_url)

if __name__ == "__main__":
	
	outcome_type = snakemake.wildcards["outcome_type"]
	model_type = snakemake.wildcards["model_type"]
	outcome_variable = snakemake.wildcards["outcome_variable"]
	study_name = f"{outcome_type}__{model_type}__{outcome_variable}"
	feature_subset = make_feature_subset(outcome_type)
	
	storage = optuna.storages.RDBStorage(url=psql_url, engine_kwargs={"pool_size": 0})
	
	try:
		optuna.delete_study(storage=storage, study_name=study_name)
	except:
		pass

	study = optuna.create_study(
		storage=storage, study_name=study_name, direction="minimize",
		pruner=optuna.pruners.HyperbandPruner(min_resource=15, reduction_factor=3),
		sampler=optuna.samplers.TPESampler(multivariate=True, seed=42)
	)
	study.set_user_attr("contributors", ["benkaa"])

	n_jobs = snakemake.threads
	n_trials = snakemake.params.n_optuna_trials // n_jobs

	def optimize(n_trials):
		
def make_study():
    try:
        op.delete_study(storage=storage, study_name=name)
    except:
        pass
    return op.create_study(storage=storage, study_name=name)
Example #11
0
        # Check if the study record already exists.
        try:
            optuna.load_study(
                study_name=study_name,
                storage=storage,
                #direction = direction,
                sampler=sampler)
        except KeyError:  # The study name was not in storage, can proceed
            pass

        except:
            if args_dict["override"]:
                message = f"Removing the study that exists in storage {storage}."
                optuna.delete_study(study_name=study_name,
                                    storage=storage,
                                    direction=direction,
                                    sampler=sampler)
            else:
                message = f"The study {study_name} already exists in storage and reload was False."
                message += f" Delete it from {storage}, and try again or rerun this script"
                message += f" with the flag: --override 1"
                raise OSError(message)

        # Create a new study in the storage object
        if single_objective:
            create_study = optuna.create_study(study_name=study_name,
                                               storage=storage,
                                               direction=direction,
                                               sampler=sampler)
        else:
            create_study = optuna.multi_objective.study.create_study(