コード例 #1
0
ファイル: optimize.py プロジェクト: not522/optuna
def create_sampler(sampler_mode: str) -> BaseSampler:
    if sampler_mode == "random":
        return RandomSampler()
    elif sampler_mode == "tpe":
        return TPESampler()
    elif sampler_mode == "cmaes":
        return CmaEsSampler()
    else:
        assert False
コード例 #2
0
def optuna_search_evo():
    try:
        if path.exists("study_evo.pkl"):
            study = joblib.load('study_evo.pkl')
        else:
            study = optuna.create_study(direction='maximize', sampler=CmaEsSampler())

        study.optimize(objective_evo, n_trials=300)

    finally:
        joblib.dump(study, 'study_evo.pkl')
        print('Optuna search done...')
        print('Best trial:')
        print(study.best_trial)
        print('Best params:')
        print(study.best_params)
        print('Best value:')
        print(study.best_value)
コード例 #3
0
    def optimize(self) -> TuningResult:
        """
        Method performs a hyperparameter optimization run according to the selected HPO-method.
        :return: result: TuningResult
            TuningResult-object that contains the results of this optimization run.
        """

        # Select the specified HPO-tuning method
        if self.hpo_method == 'CMA-ES':
            this_optimizer = CmaEsSampler(seed=self.random_seed)

        elif self.hpo_method == 'TPE':
            this_optimizer = TPESampler(seed=self.random_seed)

        elif self.hpo_method == 'RandomSearch':
            this_optimizer = RandomSampler(seed=self.random_seed)

        else:
            raise Exception('Unknown HPO-method!')

        # Create a study object and specify the optimization direction
        study_name = 'hpo_study'
        study_storage = 'sqlite:///hpo.db'

        # Optimize on the predefined n_func_evals and measure the wall clock times
        # start_time = time.time()
        self.times = []  # Initialize a list for saving the wall clock times

        # Delete old study objects ('fresh start') >> otherwise the old results will be included
        try:
            optuna.delete_study(study_name, study_storage)

        except:
            print('No old optuna study objects found!')

        # Use a warmstart configuration?
        if self.do_warmstart == 'Yes':

            try:
                # Create a new study
                warmstart_study = optuna.create_study(direction='minimize',
                                                      storage=study_storage,
                                                      study_name=study_name,
                                                      load_if_exists=False)

                # Retrieve the warmstart hyperparameters for the ML-algorithm
                warmstart_params = self.get_warmstart_configuration()

                # Initialize a dictionary for the warmstart HP-configuration
                warmstart_dict = {}

                # Iterate over all hyperparameters of this ML-algorithm's tuned HP-space and append the default values
                # to the dictionary
                for i in range(len(self.hp_space)):

                    this_param = self.hp_space[i].name
                    this_warmstart_value = warmstart_params[this_param]

                    # For some HPs (e.g. max_depth of RF) the default value is None, although their typical dtype is
                    # different (e.g. int)
                    if this_warmstart_value is None and type(
                            self.hp_space[i]) == skopt.space.space.Integer:
                        # Try to impute these values by the mean value
                        this_warmstart_value = int(
                            0.5 *
                            (self.hp_space[i].low + self.hp_space[i].high))

                    # Add the warm start HP-value to the dictionary
                    warmstart_dict[this_param] = this_warmstart_value

                # Enqueue a trial with the warm start HP-values
                warmstart_study.enqueue_trial(params=warmstart_dict)

                # Optimize to ensure that the warm start configuration is evaluated first (e.g. for parallel processes)
                warmstart_study.optimize(func=self.objective, n_trials=1)

                # Set flag to indicate that a warmstart took place
                did_warmstart = True

            except:
                print('Warmstarting optuna failed!')

                # Set flag to indicate that NO warmstart took place
                did_warmstart = False

        # No warmstart requested
        else:

            # Set flag to indicate that NO warmstart took place
            did_warmstart = False

        # Create a new study or reload the warmstart study (if available and requested)
        study = optuna.create_study(sampler=this_optimizer,
                                    direction='minimize',
                                    study_name=study_name,
                                    storage=study_storage,
                                    load_if_exists=True)

        # If a warm start took place, reduce the number of remaining function evaluations to ensure comparability
        # (equal budgets)
        if did_warmstart:
            n_func_evals = self.n_func_evals - 1
        else:
            n_func_evals = self.n_func_evals

        # Start the optimization
        try:

            study.optimize(func=self.objective,
                           n_trials=n_func_evals,
                           n_jobs=self.n_workers)
            run_successful = True

        # Algorithm crashed
        except:
            # Add a warning here
            run_successful = False

        # If the optimization run was successful, determine the optimization results
        if run_successful:

            # Create a TuningResult-object to store the optimization results
            # Transformation of the results into a TuningResult-Object
            all_trials = study.get_trials()
            best_configuration = study.best_params
            best_val_loss = study.best_value

            start_times = []  # Start time of each trial
            finish_times = []  # Finish time of each trial
            # evaluation_ids = []  # Number the evaluations / iterations of this run
            unsorted_losses = []  # Loss of each iteration
            unsorted_configurations = ()  # HP-configuration of each iteration

            # Number the evaluations / iterations of this run
            evaluation_ids = list(range(1, len(all_trials) + 1))

            for i in range(len(all_trials)):
                start_times.append(all_trials[i].datetime_start)
                finish_times.append(all_trials[i].datetime_complete)

                # evaluation_ids.append(all_trials[i].number)
                unsorted_losses.append(all_trials[i].value)
                unsorted_configurations = unsorted_configurations + (
                    all_trials[i].params, )

            abs_start_time = min(start_times)  # start time of the first trial
            unsorted_timestamps = []
            for i in range(len(start_times)):
                this_time = finish_times[
                    i] - abs_start_time  # time difference to the start of the first trial
                this_timestamp = this_time.total_seconds(
                )  # conversion into float value
                unsorted_timestamps.append(this_timestamp)

            wall_clock_time = max(unsorted_timestamps)

            ids = list(range(1, len(all_trials) + 1))
            temp_dict = {
                'ids': ids,
                'timestamps [finished]': unsorted_timestamps,
                'losses': unsorted_losses,
                'configurations': unsorted_configurations,
            }

            unsorted_df = pd.DataFrame.from_dict(data=temp_dict)
            unsorted_df.set_index('ids', inplace=True)

            # Sort DataFrame according to timestamps (necessary for multiprocessing)
            sorted_df = unsorted_df.sort_values(by=['timestamps [finished]'],
                                                ascending=True,
                                                inplace=False)

            timestamps = list(sorted_df['timestamps [finished]'])
            losses = list(sorted_df['losses'])
            configurations = tuple(sorted_df['configurations'])

            # Optuna uses full budgets for its HPO methods
            budget = [100.0] * len(losses)

            # Compute the loss on the test set for the best found configuration
            test_loss = self.train_evaluate_ml_model(params=best_configuration,
                                                     cv_mode=False,
                                                     test_mode=True)

        # Run not successful (algorithm crashed)
        else:
            evaluation_ids, timestamps, losses, configurations, best_val_loss, best_configuration, wall_clock_time, \
            test_loss, budget = self.impute_results_for_crash()

        # Pass the results to a TuningResult-object
        result = TuningResult(evaluation_ids=evaluation_ids,
                              timestamps=timestamps,
                              losses=losses,
                              configurations=configurations,
                              best_val_loss=best_val_loss,
                              best_configuration=best_configuration,
                              wall_clock_time=wall_clock_time,
                              test_loss=test_loss,
                              successful=run_successful,
                              did_warmstart=did_warmstart,
                              budget=budget)

        return result
コード例 #4
0
                    action='store_true')
parser.add_argument('--json',
                    help="Print best params in json",
                    action='store_true')

args = parser.parse_args()

seed = np.random.randint(2**32 - 1)
n_startup_trials = args.startup

if args.sampler == "TPE":
    sampler = TPESampler(n_startup_trials=n_startup_trials,
                         seed=seed,
                         multivariate=False)
elif args.sampler == "CMAES":
    sampler = CmaEsSampler(n_startup_trials=n_startup_trials, seed=seed)
else:
    print("sampler not correctly specified")

if args.results_only:
    study = optuna.load_study(study_name=args.name,
                              storage=args.storage,
                              sampler=sampler)
else:
    study = optuna.create_study(study_name=args.name,
                                storage=args.storage,
                                direction='minimize',
                                sampler=sampler,
                                load_if_exists=True)
    study.set_user_attr("sampler", args.sampler)
    if args.robot == "wolfgang":
コード例 #5
0
    optimizer = keras.optimizers.RMSprop(learning_rate=lr)

    model.compile(optimizer=optimizer, loss='mse')

    model.fit(X_train, y_train, epochs=10, batch_size=128, validation_data=(X_val, y_val), verbose=1)
    y_pred = model.predict(X_val)

    val_loss = sqrt(mean_squared_error(y_val, y_pred))

    return val_loss

# 3. Create a study object an optimize the objective function
if SAMPLER == 'TPE':
    thisSampler = TPESampler()
elif SAMPLER == 'CMA-ES':
    thisSampler = CmaEsSampler()
else:
    thisSampler = RandomSampler()

if ALGORITHM == 'RandomForestRegressor':
    thisObjective = rf_objective
elif ALGORITHM == 'Keras':
    thisObjective = keras_objective

study = optuna.create_study(sampler=thisSampler, direction='minimize')
study.optimize(func=thisObjective, n_trials=100)

# >> test how to access the trials data
all_trials = study.get_trials()  # List of Froze-Trial objects
df_trials = study.trials_dataframe()  # Pandas DataFrame