예제 #1
0
def test_minimizer_api_dummy_minimize(verbose, call):
    # dummy_minimize is special as it does not support all parameters
    # and does not fit any models
    n_calls = 7
    result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)],
                            n_calls=n_calls, random_state=1,
                            verbose=verbose, callback=call)

    assert result.models == []
    check_minimizer_api(result, n_calls)
    check_minimizer_bounds(result, n_calls)
    with pytest.raises(ValueError):
        dummy_minimize(lambda x: x, [[-5, 10]])
def test_minimizer_api():
    # dummy_minimize is special as it does not support all parameters
    # and does not fit any models
    call_single = lambda res: res.x
    call_list = [call_single, check_result_callable]

    for verbose, call in product([True, False], [call_single, call_list]):
        result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)],
                                n_calls=7,
                                random_state=1,
                                verbose=verbose,
                                callback=call)

        assert (result.models is None)
        yield (check_minimizer_api, result)
        yield (check_minimizer_bounds, result)
        assert_raise_message(ValueError, "return a scalar", dummy_minimize,
                             lambda x: x, [[-5, 10]])

        n_calls = 7
        n_random_starts = 3
        n_models = n_calls - n_random_starts

        for minimizer in MINIMIZERS:
            result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
                               n_random_starts=n_random_starts,
                               n_calls=n_calls,
                               random_state=1,
                               verbose=verbose,
                               callback=call)

            yield (check_minimizer_api, result, n_models)
            yield (check_minimizer_bounds, result)
            assert_raise_message(ValueError, "return a scalar", minimizer,
                                 lambda x: x, [[-5, 10]])
예제 #3
0
def test_dummy_categorical_integer():
    def f(params):
        return 0

    dims = [[1]]
    res = dummy_minimize(f, dims, n_calls=1, random_state=1)
    assert res.x_iters[0][0] == dims[0][0]
예제 #4
0
def _gen_trial_vals(dims, num_trials, random_seed):
    import skopt
    res = skopt.dummy_minimize(lambda *args: 0,
                               dims,
                               n_calls=num_trials,
                               random_state=random_seed)
    return res.x_iters
예제 #5
0
    def __optimize_n_clusters__(self):
        max_iter = self.cluster_max - self.cluster_min
        ntests = int(0.5 * (self.cluster_max - self.cluster_min))

        if self.method == "exhaustive" or max_iter < 30:
            # No-Optimize Full Test
            result = self.__cluster_metric__(self.cluster_min)
            best = self.cluster_min
            for k_val in range(self.cluster_min, self.cluster_max):
                run_result = self.__cluster_metric__([k_val])
                if run_result < result:
                    result = run_result
                    best = k_val
            return result, best, self.cluster_max - self.cluster_min
        
        elif self.method == "gprocess":
            # Gaussian Opt.
            # gp_minimize is a gaussian implementation similar to sklearn GridSearch
            res = gp_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], n_calls=ntests)
            # res.fun #score
            # res.func_vals #all tested scores
            return res.fun, res.x[0], res.x_iters

        elif self.method == "dtree":
            # Decision Tree Opt.
            res = forest_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], base_estimator='RF', n_calls=ntests)
            # res.fun #score
            # res.func_vals #all tested scores
            return res.fun, res.x[0], res.x_iters

        elif self.method == "dummy":
            # Random Opt.
            res = dummy_minimize(self.__cluster_metric__, [(self.cluster_min, self.cluster_max)], n_calls=ntests)
            return res.fun, res.x[0], res.x_iters
예제 #6
0
def main(argv):
    if not FLAGS.force:
        check_commit()

    os.makedirs(FLAGS.log, exist_ok=True)
    path_history = os.path.join(FLAGS.log, HISTORY_SHEET_NAME)

    name_templ, train_templ, eval_templ, params, spaces, preprocess, optimizer = parse_yaml(FLAGS.yaml)

    if FLAGS.verbose:
        for param, space in zip(params, spaces):
            print("{}: {}".format(param, space))

    upload = not FLAGS['dry-run'].value
    job = Job(
        name_templ, train_templ, eval_templ, params, preprocess, upload=upload, verbose=FLAGS.verbose, debug=FLAGS.debug)
    callbacks = [LogCallback(params=params, path_out=path_history)]

    if not optimizer["maximize"]:
        func = job
    else:
        func = lambda x: -1 * job(x)

    if optimizer["type"] == "bayesian":
        res = gp_minimize(func, spaces, callback=callbacks, **optimizer["config"])
    elif optimizer["type"] == "random":
        res = dummy_minimize(func, spaces, callback=callbacks, **optimizer["config"])
    else:
        raise ValueError("minimizer {} is invalid".format(optimizer["type"]))
예제 #7
0
def test_minimizer_api():
    # dummy_minimize is special as it does not support all parameters
    # and does not fit any models
    call_single = lambda res: res.x
    call_list = [call_single, check_result_callable]

    for verbose, call in product([True, False], [call_single, call_list]):
        result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)],
                                n_calls=7, random_state=1,
                                verbose=verbose, callback=call)

        assert(result.models is None)
        yield (check_minimizer_api, result)
        yield (check_minimizer_bounds, result)
        assert_raise_message(ValueError,
                             "return a scalar",
                             dummy_minimize, lambda x: x, [[-5, 10]])

        n_calls = 7
        n_random_starts = 3
        n_models = n_calls - n_random_starts

        for minimizer in MINIMIZERS:
            result = minimizer(branin, [(-5.0, 10.0), (0.0, 15.0)],
                               n_random_starts=n_random_starts,
                               n_calls=n_calls,
                               random_state=1,
                               verbose=verbose, callback=call)

            yield (check_minimizer_api, result, n_models)
            yield (check_minimizer_bounds, result)
            assert_raise_message(ValueError,
                                 "return a scalar",
                                 minimizer, lambda x: x, [[-5, 10]])
예제 #8
0
 def _random_trial(state):
     import skopt
     res = skopt.dummy_minimize(lambda *args: 0,
                                state.flag_dims,
                                n_calls=1,
                                random_state=state.random_state)
     state.random_state = res.random_state
     return trial_flags(state.flag_names, res.x_iters[-1])
예제 #9
0
 def _random_trial(state):
     import skopt
     res = skopt.dummy_minimize(lambda *args: 0,
                                state.dims,
                                n_calls=1,
                                random_state=state.random_state)
     state.update(res)
     return state.next_trial_flags()
예제 #10
0
def run(args):
    # Create base serialization dir
    if not os.path.exists(args.serialization_dir):
        os.makedirs(args.serialization_dir)

    # Read in search configuration and create the blackbox function to optimize
    f, dimensions, x0, trial_paths, delete_worse_files_cb = setup(args)
    n_random_starts = max(1,args.n_random_starts) if x0 is None else args.n_random_starts
    callback = None if args.no_delete_worse else delete_worse_files_cb

    # Run the actual optimization
    if args.mode == 'gp':
        results = skopt.gp_minimize(
            f, dimensions,
            x0=x0,
            n_calls=args.n_calls,
            n_random_starts=n_random_starts,
            random_state=args.random_seed,
            verbose=True,
            acq_optimizer='sampling',
            xi=args.xi,
            kappa=args.kappa,
            callback=callback,
        )
    elif args.mode == 'random':
        results = skopt.dummy_minimize(
            f, dimensions,
            x0=x0,
            n_calls=args.n_calls,
            random_state=args.random_seed,
            verbose=True,
            callback=callback,
        )

    elif args.mode == 'tree':
        results = skopt.forest_minimize(
            f, dimensions,
            x0=x0,
            n_calls=args.n_calls,
            n_random_starts=n_random_starts,
            random_state=args.random_seed,
            verbose=True,
            xi=args.xi,
            kappa=args.kappa,
            callback=callback,
        )


    # Maybe evaluate the best model on the test dataset
    if args.evaluate_on_test:
        logger.info('EVALUATE ON TEST')
        evaluate_on_test(args, results, trial_paths)

    # Save a bunch of visualizations of the search process
    logger.info('PLOTTING RESULTS')
    plot_results(args.serialization_dir, results)

    logger.info('ALL DONE')
예제 #11
0
def test_api():
    res = dummy_minimize(
        branin, [(-5.0, 10.0), (0.0, 15.0)], random_state=0, maxiter=100)
    assert_array_equal(res.x.shape, (2,))
    assert_array_equal(res.x_iters.shape, (100, 2))
    assert_array_equal(res.func_vals.shape, (100,))
    assert_array_less(res.x_iters, np.tile([10, 15], (100, 1)))
    assert_array_less(np.tile([-5, 0], (100, 1)), res.x_iters)
    assert_raises(ValueError, dummy_minimize, lambda x: x, [[-5, 10]])
예제 #12
0
def _trials_for_dims(names, dims, initial_x, num_trials, random_seed):
    res = skopt.dummy_minimize(lambda *args: 0,
                               dims,
                               n_calls=num_trials,
                               random_state=random_seed)
    trials_xs = res.x_iters
    if trials_xs:
        _apply_initial_x(initial_x, trials_xs[0])
    return [dict(zip(names, _native_python_xs(xs))) for xs in trials_xs]
예제 #13
0
    def run(self):

        dimensions = [(0, 1)] * self._action_space
        results = dummy_minimize(self.optimization_function,
                                 dimensions,
                                 random_state=1,
                                 n_calls=1000,
                                 verbose=True)
        self.results['optimal_decision'] = results.x
        self.results['objective_value'] = results.fun
예제 #14
0
def test_api():
    res = dummy_minimize(branin, [[-5, 10], [0, 15]],
                         random_state=0,
                         maxiter=100)
    assert_array_equal(res.x.shape, (2, ))
    assert_array_equal(res.x_iters.shape, (100, 2))
    assert_array_equal(res.func_vals.shape, (100, ))
    assert_array_less(res.x_iters, np.tile([10, 15], (100, 1)))
    assert_array_less(np.tile([-5, 0], (100, 1)), res.x_iters)
    assert_raises(ValueError, dummy_minimize, lambda x: x, [[-5, 10]])
예제 #15
0
def test_minimizer_api_dummy_minimize(verbose, call):
    # dummy_minimize is special as it does not support all parameters
    # and does not fit any models
    n_calls = 7
    result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)],
                            n_calls=n_calls, random_state=1,
                            verbose=verbose, callback=call)

    assert result.models == []
    check_minimizer_api(result, n_calls)
    check_minimizer_bounds(result, n_calls)
    assert_raise_message(ValueError,
                         "return a scalar",
                         dummy_minimize, lambda x: x, [[-5, 10]])
예제 #16
0
def dummy(configurations, black_box_function, logger, verbose=False):
    search_space = [Integer(1, configurations["thread_limit"])]

    optimizer = dummy_minimize(
        func=black_box_function,
        dimensions=search_space,
        n_calls=configurations["random"]["num_of_exp"],
        random_state=None,
        x0=None,
        y0=None,
        verbose=verbose,
    )

    return optimizer.x
예제 #17
0
 def getBaseline(self):
     self.faketime = 0
     for i in range(self.niter):
         filename = self.dir + '/experiments/' + str(self.id) + '/baseline_'+ str(i+1) +'.txt'
         if os.path.isfile(filename):
             x_iters,y_vals,time_vals = self.load_baseline(self.id, i)
             if self.ncalls-len(y_vals) > 0:
                 self.savefile = open(filename, mode='a', newline='')
                 self.startime = time()
                 self.faketime = time_vals[-1]
                 result = dummy_minimize(self.obj, self.searchSpace, verbose=True, n_calls=self.ncalls-len(y_vals),
                                     x0=x_iters, y0=y_vals, callback=[self.savePoint, self.earlyStopping])
             else:
                 result = ResultData(x_iters, y_vals, time_vals)
             self.ignorePoint = True
         else:
             self.savefile = open(filename, mode='w', newline='')
             self.startime = time()
             result = dummy_minimize(self.obj, self.searchSpace, verbose=True, n_calls=self.ncalls,
                                 callback=[self.savePoint])
         self.baseline.append(result)
         if self.savefile != None:
             self.savefile.close()
예제 #18
0
def main():
    parser = argparse.ArgumentParser(description='Setup experiment.')
    parser.add_argument('--results_dir', type=str, help='Path to results directory.')
    args = parser.parse_args()

    bounds = np.tile((-5., 5.), (5, 1))

    results = dummy_minimize(stybtang,
                        bounds,
                        verbose=True,
                        n_calls=1,
                        random_state=0)

    results_path = os.path.join(args.results_dir, 'hyperband_stybtang.pkl')
    dump(results, results_path)
예제 #19
0
def test_minimizer_api_dummy_minimize(verbose, call):
    # dummy_minimize is special as it does not support all parameters
    # and does not fit any models
    n_calls = 7
    result = dummy_minimize(branin, [(-5.0, 10.0), (0.0, 15.0)],
                            n_calls=n_calls,
                            random_state=1,
                            verbose=verbose,
                            callback=call)

    assert (result.models is None)
    check_minimizer_api(result, n_calls)
    check_minimizer_bounds(result, n_calls)
    assert_raise_message(ValueError, "return a scalar", dummy_minimize,
                         lambda x: x, [[-5, 10]])
예제 #20
0
def test_checkpoint_saver():
    checkpoint_path = "./test_checkpoint.pkl"

    if os.path.isfile(checkpoint_path):
        os.remove(checkpoint_path)

    checkpoint_saver = CheckpointSaver(checkpoint_path, compress=9)
    result = dummy_minimize(bench1, [(-1.0, 1.0)],
                            callback=checkpoint_saver,
                            n_calls=10)

    assert os.path.exists(checkpoint_path)
    assert load(checkpoint_path).x == result.x

    if os.path.isfile(checkpoint_path):
        os.remove(checkpoint_path)
예제 #21
0
def test_checkpoint_saver():
    checkpoint_path = "./test_checkpoint.pkl"

    if os.path.isfile(checkpoint_path):
        os.remove(checkpoint_path)

    checkpoint_saver = CheckpointSaver(checkpoint_path, compress=9)
    result = dummy_minimize(bench1,
        [(-1.0, 1.0)],
        callback=checkpoint_saver,
        n_calls=10)

    assert os.path.exists(checkpoint_path)
    assert load(checkpoint_path).x == result.x

    if os.path.isfile(checkpoint_path):
        os.remove(checkpoint_path)
예제 #22
0
                def random_forest_on(rmspe_rf, mape_rf):
                    def treina_random_forest(params):
                        max_depth = params[0]
                        min_samples_split = params[1]
                        min_samples_leaf = params[2]
                        model = RandomForestRegressor(
                            n_estimators=1500,
                            max_depth=max_depth,
                            min_samples_split=min_samples_split,
                            min_samples_leaf=min_samples_leaf)
                        return -np.mean(
                            cross_val_score(model,
                                            X_train,
                                            y_train['point_estimate'],
                                            cv=cv_inner,
                                            scoring="neg_mean_squared_error")
                        )  #mean_squared_error(y_test, p)

                    resultado_random_forest = dummy_minimize(
                        treina_random_forest,
                        space_xgb,
                        n_calls=n_calls_hyp,
                        verbose=1)
                    param_random = resultado_random_forest.x
                    random_reg = RandomForestRegressor(
                        n_estimators=1500,
                        max_depth=param_random[0],
                        min_samples_split=param_random[1],
                        min_samples_leaf=param_random[2])

                    randreg = random_reg.fit(X_train,
                                             y_train['point_estimate'])
                    rand_pred = randreg.predict(X_test)
                    rmspe_rf.append((np.mean(
                        ((y_test[dic.get(i)] - rand_pred) /
                         y_test[dic.get(i)])**2))**0.5)
                    mape_rf.append(
                        np.mean(
                            np.abs(y_test[dic.get(i)] - rand_pred) /
                            y_test[dic.get(i)]))
                    squared_rf.append(
                        mean_squared_error(y_test[dic.get(i)], rand_pred)**0.5)
                    absolut_rf.append(
                        mean_absolute_error(y_test[dic.get(i)], rand_pred))
                    return (rmspe_rf, mape_rf)
예제 #23
0
def check_minimize(func, y_opt, dimensions, margin, n_calls):
    r = dummy_minimize(func, dimensions, n_calls=n_calls, random_state=1)
    assert_less(r.fun, y_opt + margin)
예제 #24
0
def test_timer_callback():
    callback = TimerCallback()
    dummy_minimize(bench1, [(-1.0, 1.0)], callback=callback, n_calls=10)
    assert_equal(len(callback.iter_time), 10)
    assert_less(0.0, sum(callback.iter_time))
예제 #25
0
def check_minimize(func, y_opt, dimensions, margin, n_calls):
    r = dummy_minimize(func, dimensions, n_calls=n_calls, random_state=1)
    assert_less(r.fun, y_opt + margin)
예제 #26
0
    def tune(self, params, evals=10, init_config=None, seed=None):
        """
        Runs the hyperparameter search using Gaussian Process as surrogate model or Random Search,
        saves the results of the trials and print the best found parameters.
        使用 高斯过程 作为 替代模型 进行 超参数 搜索 或 随机搜索
        保存 并 打印 训练 得到的 最佳 参数
        Parameters
        ----------
        params: list
            List of skopt.space.space.Dimensions to be searched.
        参数为 scikit-learn Base class for search space dimensions
        evals: int
            Number of evaluations to perform.

        init_config: list, default None
            An initial parameter configuration for seeding the Gaussian Process

        seed: int, default None
            Seed for random_state of `gp_minimize` or `dummy_minimize`.
            Set to a fixed integer for reproducibility.
        """

        msg = 'Started ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name
        subprocess.run(['telegram-send', msg])

        # URM_test CSR矩阵的shape
        U, I = self.URM_test.shape

        if self.recommender_class == GANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='emb_dim', dtype=int))
            self.fit_param_names.append('emb_dim')

        if self.recommender_class == CFGAN or self.recommender_class == DeepGANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int))
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='g_nodes', dtype=int))
            self.fit_param_names.append('d_nodes')
            self.fit_param_names.append('g_nodes')

        if self.recommender_class == DisGANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int))
            self.fit_param_names.append('d_nodes')

        self.dimension_names = [p.name for p in params]

        '''
        Need to make sure that the max. value of `num_factors` parameters must be lower than
        the max(U, I)
        '''
        try:
            idx = self.dimension_names.index('num_factors')
            maxval = params[idx].bounds[1]
            if maxval > min(U, I):
                params[idx] = Integer(1, min(U, I), name='num_factors', dtype=int)
        except ValueError:
            pass

        if len(params) > 0:

            # Check if there is already a checkpoint for this experiment 检查点
            checkpoint_path = os.path.join(self.logsdir, 'checkpoint.pkl')
            checkpoint_exists = True if os.path.exists(checkpoint_path) else False
            checkpoint_saver = CheckpointSaver(os.path.join(self.logsdir, 'checkpoint.pkl'), compress=3)

            if seed is None:
                seed = self.seed

            t_start = int(time.time())

            if checkpoint_exists:
                previous_run = skopt.load(checkpoint_path)
                if self.method == 'bayesian':
                    results = gp_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals),
                                          x0=previous_run.x_iters, y0=previous_run.func_vals, n_random_starts=0,
                                          random_state=seed, verbose=True, callback=[checkpoint_saver])
                else:
                    results = dummy_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals),
                                             x0=previous_run.x_iters, y0=previous_run.func_vals, random_state=seed,
                                             verbose=True, callback=[checkpoint_saver])
            else:
                # 超参数优化
                if self.method == 'bayesian':
                    results = gp_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True,
                                          callback=[checkpoint_saver])
                else:
                    results = dummy_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True,
                                          callback=[checkpoint_saver])

            t_end = int(time.time())

        # Save best parameters of this experiment
        # best_params = dict(zip(self.dimension_names, results.x))
        # with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f:
        #     pickle.dump(best_params, f, pickle.HIGHEST_PROTOCOL)

            best_params = self.load_best_params()

            with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f:
                f.write('Experiment ran for {}\n'.format(str(datetime.timedelta(seconds=t_end - t_start))))
                f.write('Best {} score: {}. Best result found at: {}\n'.format(self.metric, results.fun, best_params))

            if self.recommender_class in [IALSRecommender, MatrixFactorization_BPR_Cython]:
                self.dimension_names.append('epochs')
            self.build_fit_params(best_params.values())

        # Retrain with all training data
        set_seed(seed)
        if self.isGAN:
            model = self.recommender_class(self.URM_train, mode=train_mode, is_experiment=True)
            model.logsdir = self.logsdir
            model.fit(**self.fit_params)
            # load_models(model, save_dir='best_model', all_in_folder=True)

        else:
            model = self.recommender_class(self.URM_train)
            model.fit(**self.fit_params)
            # model.loadModel(os.path.join(self.logsdir, 'best_model'))

        _, results_run_string = self.evaluatorTest.evaluateRecommender(model)

        print('\n\nResults on test set:')
        print(results_run_string)
        print('\n\n')

        with open(os.path.join(self.logsdir, 'result_test.txt'), 'w') as f:
            f.write(results_run_string)

        msg = 'Finished ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name
        subprocess.run(['telegram-send', msg])
예제 #27
0
def compare_optimizers(num_instances=4, graph_size=15, n_calls=8, n_random_starts=2):
    global pbar
    pbar = None
    
    # For progress bar.
    pbar = tqdm(total=num_instances*n_calls*4)
    
    instances = [Graph(graph_size) for _ in range(num_instances)]
    
    # Percent of optimal score acheived by each algorithm.
    dummy = []
    decision_trees = []
    gradient_boosted_trees = []
    baynesian = []
    
    # For each instance, run each algorithm.
    for inst in instances:
        # Scikit functions only take in parameters and want to minimize values.
        # Create a wrapper function to format get_expectation.
        sk_get_exp = lambda x: -1*get_expectation(x, inst)

        
        opt = inst.optimal_score()[0]
        
        # Dummy.
        inst.clear_runs()
        dummy_minimize(func=sk_get_exp,
                      dimensions=[(0,2*np.pi),(0,np.pi)],
                      n_calls=n_calls)
        dummy.append(float(inst.currentScore) / opt)

        # Decision Trees.
        inst.clear_runs()
        forest_minimize(func=sk_get_exp,
                      dimensions=[(0,2*np.pi),(0,np.pi)],
                      n_calls=n_calls,
                      n_random_starts=n_random_starts)
        decision_trees.append(float(inst.currentScore) / opt)
        
        # Gradient Boosted Decision Trees.
        inst.clear_runs()
        gbrt_minimize(func=sk_get_exp,
                      dimensions=[(0,2*np.pi),(0,np.pi)],
                      n_calls=n_calls,
                      n_random_starts=n_random_starts)
        gradient_boosted_trees.append(float(inst.currentScore) / opt)
        
        # Baynesian.
        inst.clear_runs()
        gp_minimize(func=sk_get_exp,
                      dimensions=[(0,2*np.pi),(0,np.pi)],
                      n_calls=n_calls,
                      n_random_starts=n_random_starts)
        baynesian.append(float(inst.currentScore) / opt)

    # Compare mean/stdev of % opt. achieved for each algorithm.
    print("-- % of Optimal Achieved, Mean and Std. Dev --")
    print("Random Sampling:\nMean: %s\nStd. Dev: %s" % (mean(dummy), stdev(dummy)))
    print("Decision Trees:\nMean: %s\nStd. Dev: %s" % (mean(decision_trees), stdev(decision_trees)))
    print("Gradient Boosted Decision Trees:\nMean: %s\nStd. Dev: %s" % (mean(gradient_boosted_trees), stdev(gradient_boosted_trees)))
    print("Baynesian Optimization:\nMean: %s\nStd. Dev: %s" % (mean(baynesian), stdev(baynesian)))
예제 #28
0
def hyperdrive(objective, hyperparameters, results_path, model="GP", n_iterations=50, verbose=False,
               checkpoints_path=None, deadline=None, sampler=None, n_samples=None, random_state=0):
    """
    Distributed optimization - one optimization per node.

    Parameters
    ----------
    * `objective` [function]:
        User defined function which calls a learner
        and returns a metric of interest.

    * `hyperparameters` [list, shape=(n_hyperparameters,)]:

    * `results_path` [string]
        Path to save optimization results

    * `checkpoint_path` [string]
        Path to previously saved results. Used to resume optimization.

    * `model` [string, default="GP"]
        Probilistic learner used to model our objective function.
        Options:
        - "GP": Gaussian process
        - "RF": Random forest
        - "GBRT": Gradient boosted regression trees
        - "RAND": Random search

    * `n_iterations` [int, default=50]
        Number of optimization iterations

    * `verbose` [bool, default=False]
        Verbosity of optimization.

    * `checkpoints` [bool, default=False]
        Whether to checkpoint at each step of the optimization.

    * `deadline` [int, optional]
        Deadline (seconds) for the optimization to finish within.

    * `sampler` [str, default=None]
        Random sampling scheme for optimizer's initial runs.
        Options:
        - "lhs": latin hypercube sampling

    * `n_samples` [int, default=None]
        Number of random samples to be drawn from the `sampler`.
        - Required if you would like to use `sampler`.
        - Must be <= the number of elements in the smallest hyperparameter bound's set.

    * `random_state` [int, default=0]
        Random state for reproducibility.
    """
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    if checkpoints_path and sampler:
        raise ValueError('Cannot use both a restart from a previous run and ' \
                         'use latin hypercube sampling for initial search points!')

    # Setup savefile
    if rank < 10:
        # Ensure results are sorted by rank
        filename = 'hyperspace' + str(0) + str(rank)
    else:
        filename = 'hyperspace' + str(rank)

    savefile = os.path.join(results_path, filename)

    # Create hyperspaces, and either sampling bounds or checkpoints
    hyperspace = create_hyperspace(hyperparameters)
    space = hyperspace[rank]

    # Latin hypercube sampling
    if sampler and not n_samples:
        raise ValueError(f'Sampler requires n_samples > 0. Got {n_samples}')
    elif sampler and n_samples:
        hyperbounds = create_hyperbounds(hyperparameters)
        bounds = hyperbounds[rank]
        # Get initial points in domain via latin hypercube sampling
        init_points = lhs_start(bounds, n_samples)
        init_response = None
        n_rand = 10 - len(init_points)
    else:
        init_points = None
        init_response = None
        n_rand = 10

    # Resuming from checkpoint
    if checkpoints_path:
        checkpoint = _load_checkpoint(checkpoints_path, rank)
        try:
            init_points = checkpoint.x_iters
            init_response = checkpoint.func_vals
            n_rand = 10 - len(init_points)
        except AttributeError:
            # Missing saves won't have initial values.
            init_points = None
            init_response = None
            n_rand = 10

    callbacks = []
    if deadline:
        deadline = DeadlineStopper(deadline)
        callbacks.append(deadline)

    if checkpoints_path:
        checkpoint_callback = CheckpointSaver(checkpoints_path, filename)
        callbacks.append(checkpoint_callback)

    # Thanks Guido for refusing to believe in switch statements.
    # Case 0
    if model == "GP":
        # Verbose mode should only run on node 0.
        if verbose and rank == 0:
            result = gp_minimize(objective, space, n_calls=n_iterations, verbose=verbose,
                                 callback=callbacks, x0=init_points, y0=init_response,
                                 n_random_starts=n_rand, random_state=random_state)
        else:
            result = gp_minimize(objective, space, n_calls=n_iterations,
                                 callback=callbacks, x0=init_points, y0=init_response,
                                 n_random_starts=n_rand, random_state=random_state)

    # Case 1
    elif model == "RF":
        if verbose and rank == 0:
            result = forest_minimize(objective, space, n_calls=n_iterations, verbose=verbose,
                                     callback=callbacks, x0=init_points, y0=init_response,
                                     n_random_starts=n_rand, random_state=random_state)
        else:
            result = forest_minimize(objective, space, n_calls=n_iterations,
                                     callback=callbacks, x0=init_points, y0=init_response,
                                     n_random_starts=n_rand, random_state=random_state)
    # Case 2
    elif model == "GBRT":
        if verbose and rank == 0:
            result = gbrt_minimize(objective, space, n_calls=n_iterations, verbose=verbose,
                                   callback=callbacks, x0=init_points, y0=init_response,
                                   n_random_starts=n_rand, random_state=random_state)
        else:
            result = gbrt_minimize(objective, space, n_calls=n_iterations,
                                   callback=callbacks, x0=init_points, y0=init_response,
                                   n_random_starts=n_rand, random_state=random_state)
    # Case 3
    elif model == "RAND":
        if verbose and rank == 0:
            result = dummy_minimize(objective, space, n_calls=n_iterations, verbose=verbose,
                                    callback=callbacks, x0=init_points, y0=init_response,
                                    random_state=random_state)
        else:
            result = dummy_minimize(objective, space, n_calls=n_iterations,
                                    callback=callbacks, x0=init_points, y0=init_response,
                                    random_state=random_state)
    else:
        raise ValueError("Invalid model {}. Read the documentation for "
                         "supported models.".format(model))

    # Each worker will independently write their results to disk
    dump(result, savefile)
예제 #29
0
#############################################################################
# The two dimensional partial dependence plot can look like the true
# objective but it does not have to. As points at which the objective function
# is being evaluated are concentrated around the suspected minimum the
# surrogate model sometimes is not a good representation of the objective far
# away from the minima.
#
# Random sampling
# ===============
#
# Compare this to a minimizer which picks points at random. There is no
# structure visible in the order in which it evaluates the objective. Because
# there is no model involved in the process of picking sample points at
# random, we can not plot the partial dependence of the model.

dummy_res = dummy_minimize(branin, bounds, n_calls=n_calls, random_state=4)

_ = plot_evaluations(dummy_res, bins=10)

#############################################################################
# Working in six dimensions
# =========================
#
# Visualising what happens in two dimensions is easy, where
# :class:`plots.plot_evaluations` and :class:`plots.plot_objective` start to be useful is when the
# number of dimensions grows. They take care of many of the more mundane
# things needed to make good plots of all combinations of the dimensions.
#
# The next example uses class:`benchmarks.hart6` which has six dimensions and shows both
# :class:`plots.plot_evaluations` and :class:`plots.plot_objective`.
예제 #30
0
def test_timer_callback():
    callback = TimerCallback()
    dummy_minimize(bench1, [(-1.0, 1.0)], callback=callback, n_calls=10)
    assert_equal(len(callback.iter_time), 10)
    assert_less(0.0, sum(callback.iter_time))
예제 #31
0
def get_random_samples(_):
    return dummy_minimize(score_config,
                          space,
                          n_calls=n_initial_random_samples,
                          random_state=seed)
예제 #32
0
                tournament_fraction=0.2,
                mutation_rate=0.05):
    """
    Use the GeneticOptimizer here
    """
    optimizer = GeneticOptimizer(dimensions=dimensions,
                                 n_random_starts=n_random_starts,
                                 tournament_fraction=tournament_fraction,
                                 mutation_rate=mutation_rate)

    for i in range(n_calls):
        x = optimizer.ask()
        y = func(x)
        optimizer.tell(x, y)

    return create_result(optimizer.Xi, optimizer.yi, dimensions)


if __name__ == "__main__":
    from skopt.benchmarks import branin
    from skopt import dummy_minimize

    space = [Real(-5, 10), Real(0, 15)]

    a = ga_minimize(branin, space, n_calls=64)
    b = dummy_minimize(branin, space, n_calls=64)
    c = rs_minimize(branin, space, n_calls=64)

    print(a.fun)
    print(b.fun)
    print(c.fun)
예제 #33
0
def area_limitada(new_df, space_xgb, get_bairro=0):
    """
    Bairro indica a forma de lidar com a variável de localização
    bairro = 0, usa a variável de localização sem alteração
    bairro = 1, usa a variável de localização removendo parte dos valóres de localização
    bairro = 2, não usa a variável de localização.
    """
    from skopt import dummy_minimize
    from sklearn.model_selection import cross_val_score, KFold
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    import xgboost as xgb
    n_calls_hyp = 5
    squared_percen_point_results = []
    percen_outer_point_results = []
    squared_error = []
    abs_error = []
    X = new_df

    train_limit = X[((X['latitude'] > -23.5884) & (X['latitude'] < -23.5495) &
                     (X['longitude'] > -46.6817) &
                     (X['longitude'] < -46.6379))]
    test_limit = X[~((X['latitude'] > -23.5884) & (X['latitude'] < -23.5495) &
                     (X['longitude'] > -46.6817) &
                     (X['longitude'] < -46.6379))]
    X_train = train_limit.drop([
        'id', 'address', 'tower_name', 'point_estimate', 'maximum_estimate',
        'minimum_estimate', 'latitude', 'longitude'
    ],
                               axis=1)
    X_test = test_limit.drop([
        'id', 'address', 'tower_name', 'point_estimate', 'maximum_estimate',
        'minimum_estimate', 'latitude', 'longitude'
    ],
                             axis=1)

    y_train = train_limit[['point_estimate', 'maximum_estimate']]
    y_test = test_limit[['point_estimate', 'maximum_estimate']]
    if get_bairro == 0:
        X_train, X_test = bairro(X_train, X_test, y_train, train_rf=False)
    if get_bairro == 1:
        X_train.loc[X_train['bairro'].sample(int(np.round(len(X_train) *
                                                          0.1))).index,
                    'bairro'] = np.NaN
        X_train, X_test = bairro(X_train, X_test, y_train, train_rf=False)
    if get_bairro == 2:
        X_train.drop('bairro', axis=1, inplace=True)
        X_test.drop('bairro', axis=1, inplace=True)
    y_train, y_test = y_train['point_estimate'], y_test['point_estimate']

    cv_inner = KFold(n_splits=2, shuffle=True)

    def treina_xgb_2(params):
        learning_rate = params[0]
        n_estimators = params[1]
        max_depth = params[2]
        min_child_weight = params[3]
        gamma = params[4]
        subsample = params[5]
        colsample_bytree = params[6]
        model = xgb.XGBRegressor(learning_rate=learning_rate,
                                 n_estimators=n_estimators,
                                 max_depth=max_depth,
                                 min_child_weight=min_child_weight,
                                 gamma=gamma,
                                 subsample=subsample,
                                 colsample_bytree=colsample_bytree)
        return -np.mean(
            cross_val_score(model,
                            X_train,
                            y_train,
                            cv=cv_inner,
                            scoring="neg_mean_squared_error")
        )  #mean_squared_error(y_test, p)

    resultado_xgb = dummy_minimize(treina_xgb_2,
                                   space_xgb,
                                   n_calls=n_calls_hyp,
                                   verbose=1)
    param_xgb = resultado_xgb.x

    xgb_reg = xgb.XGBRegressor(learning_rate=param_xgb[0],
                               n_estimators=param_xgb[1],
                               max_depth=param_xgb[2],
                               min_child_weight=param_xgb[3],
                               gamma=param_xgb[4],
                               subsample=param_xgb[5],
                               colsample_bytree=param_xgb[6])

    xgbclf = xgb_reg.fit(X_train, y_train)
    xgb_pred = xgbclf.predict(X_test)

    squared_percen_point_results.append((np.mean(
        ((y_test - xgb_pred) / y_test)**2))**0.5)
    percen_outer_point_results.append(
        np.mean(np.abs(y_test - xgb_pred) / y_test))
    squared_error.append(mean_squared_error(y_test, xgb_pred)**0.5)
    abs_error.append(mean_absolute_error(y_test, xgb_pred))
    results_dic = {
        'RMSPE': squared_percen_point_results,
        'MAPE': percen_outer_point_results,
        'MAE': abs_error,
        'RMSQ': squared_error
    }
    return pd.DataFrame(results_dic)
예제 #34
0
def check_minimize(func, y_opt, dimensions, margin, maxiter):
    r = dummy_minimize(func, dimensions, maxiter=maxiter, random_state=1)
    assert_less(r.fun, y_opt + margin)
예제 #35
0
    REND = 0.9

    potencia = REND * (RHO / (2 * SNOZ**2)) * (Hg /
                                               (1 / (2 * G * SNOZ**2) + F * L /
                                                (D**5)))**(3 / 2)
    caudal = (Hg / (1 / (2 * G * SNOZ**2) + F * L / D**5))**(1 / 2)
    coste = (L + 50 * sum(solucion)) * D**2

    if potencia < 8e3:
        coste = 1000000

    if caudal > 35e-3:
        coste = 1000000

    return coste

x0 = crea_individuo()
res = dummy_minimize(fitness_function_single, [(0, 1) for i in range(205)],
                     x0=[x0])
fichero = open("individuos.txt", "w")
fichero2 = open("fitness.txt", "w")
for i in res.x_iters:
    fichero.write(str(i))
    fichero.write(str("\n"))

for j in res.func_vals:
    fichero2.write(str(j))
    fichero2.write("\n")
fichero.close()
fichero2.close()
 FLAGS, unparsed = parser.parse_known_args()
 boundsOpt = [
     Real(-8, -1, name='log_init_lr'),
     Real(-8, -1, name='log_l2_r'),
     Categorical([512, 256], name='batch_size'),
     Categorical(['relu', 'tanh', 'softsign'], name='activation'),
     Real(0, 0.8, 'uniform', name='dropout_rate'),
 ]
 t1 = time.time()
 checkpoint_saver = CheckpointSaver(
     'C:/behrouz/projects/behrouz-Rui-Gaurav-project/excel-pbi-modeling/'
     'imbalanced_batch/randomSearch_checkpoint.pkl')
 results = dummy_minimize(
     run_model,
     boundsOpt,
     n_calls=200,
     random_state=None,  # set it for reproducible results
     verbose=True,
     callback=[checkpoint_saver])
 t2 = time.time()
 print('-' * 100)
 print('Best parameters Obtained:')
 print('Minimum Validation Loss obtained=', results.fun)
 print('Learning Rate:', 10**results.x[0], 'l2:', 10**results.x[1],
       'Batch size:', results.x[2], 'activation= ', results.x[3],
       'Dropout= ', results.x[4])
 print('-' * 100)
 print('Total Time(min) for optimization= ', round((t2 - t1) / 60, 2))
 from matplotlib import pyplot as plt
 plt.plot(results.func_vals)
 plt.ylabel('Validation Loss')
예제 #37
0
def get_plan(funcs,
             norm,
             goals=None,
             solver='gp_minimize',
             n_calls=25,
             random_state=None,
             n_initial_points=10,
             verbose=True):
    """Hyperparameter optimization for RayStation treatment planning.

    Hyperparameter optimization for RayStation treatment planning using
    the following solvers from scikit-optimize:

        - `gp_minimize`: Bayesian optimization using Gaussian processes.
        - `forest_minimize`: Sequential optimization using decision
           trees.
        - `dummy_minimize`: Random search by uniform sampling within the
           given bounds.

    For more details about scikit-optimize, refer to
    https://scikit-optimize.github.io/stable/index.html

    Parameters
    ----------
    funcs : str
        Path to CSV with constituent function specifications.
    norm : (str, float, float)
        Region of interest, dose, and volume used for normalization.
    goals : pandas.DataFrame or str, optional
        Path to CSV with clinical goal specifications.
        If None, goals are assigned based on constituent functions.
    solver : {'gp_minimize', 'forest_minimize', 'dummy_minimize'}, optional
        Name of scikit-optimize solver to use.
    n_calls : int, optional
        Number of calls to objective.
    random_state : int, optional
        Set random state for reproducible results.
    n_initial_points : int, optional
        Number of random function evaluations before function
        approximation.
    verbose : bool, optional
        Control the verbosity of the solver.

    Returns
    -------
    raybay.RaybayResult
        RayStation treatment plan results.

    """
    # Get RayStation objects
    patient = connect.get_current('Patient')
    case = connect.get_current('Case')
    plan = connect.get_current('Plan')
    beam_set = connect.get_current('BeamSet')

    # Initialize result object
    result = raybay.RaybayResult(patient.Name, case.CaseName, plan.Name, funcs,
                                 norm, goals, solver)

    # Optimize
    def obj(pars):
        return objective(plan, beam_set, result, funcs[:-9], pars)

    checkpoint_path = funcs[:-9] + 'checkpoint.pkl'
    checkpoint_saver = skopt.callbacks.CheckpointSaver(checkpoint_path,
                                                       store_objective=False)
    start_time = time()
    if solver == 'forest_minimize':
        result.opt_result = skopt.forest_minimize(
            obj,
            dimensions=get_dims(result.func_df),
            n_calls=n_calls,
            n_initial_points=n_initial_points,
            random_state=random_state,
            verbose=verbose,
            callback=[checkpoint_saver])
    elif solver == 'dummy_minimize':
        result.opt_result = skopt.dummy_minimize(obj,
                                                 dimensions=get_dims(
                                                     result.func_df),
                                                 n_calls=n_calls,
                                                 random_state=random_state,
                                                 verbose=verbose,
                                                 callback=[checkpoint_saver])
    else:
        result.opt_result = skopt.gp_minimize(
            obj,
            dimensions=get_dims(result.func_df),
            n_calls=n_calls,
            n_initial_points=n_initial_points,
            random_state=random_state,
            verbose=verbose,
            callback=[checkpoint_saver])
    result.opt_result.specs['args']['func'] = 'local'  # remove local func
    result.time = time() - start_time  # to allow pickling

    # Get optimal dose-volume histogram
    set_pars(plan, result.func_df, result.opt_result.x)
    calc_plan(plan, beam_set, result.norm)
    result.dvh_dict = get_dvh(result.roi_list)

    return result