예제 #1
0
 def run_exp_1(self):
     np.random.seed(162)
     search_alg, cost = self.set_basic_conf()
     search_alg = ConcurrencyLimiter(search_alg, 1)
     results_exp_1 = tune.run(
         cost, num_samples=5, search_alg=search_alg, verbose=0)
     self.log_dir = os.path.join(self.tmpdir, "warmStartTest.pkl")
     search_alg.save(self.log_dir)
     return results_exp_1
예제 #2
0
 def run_explicit_restore(self, random_state, checkpoint_path):
     np.random.set_state(random_state)
     search_alg2, cost = self.set_basic_conf()
     search_alg2 = ConcurrencyLimiter(search_alg2, 1)
     search_alg2.restore(checkpoint_path)
     return tune.run(cost,
                     num_samples=5,
                     search_alg=search_alg2,
                     scheduler=self.get_scheduler(),
                     verbose=0)
예제 #3
0
 def run_from_experiment_restore(self, random_state):
     search_alg, cost = self.set_basic_conf()
     search_alg = ConcurrencyLimiter(search_alg, 1)
     search_alg.restore_from_dir(
         os.path.join(self.tmpdir, self.experiment_name))
     results = tune.run(cost,
                        num_samples=5,
                        search_alg=search_alg,
                        verbose=0,
                        name=self.experiment_name,
                        local_dir=self.tmpdir)
     return results
예제 #4
0
 def run_part_from_scratch(self):
     np.random.seed(162)
     search_alg, cost = self.set_basic_conf()
     search_alg = ConcurrencyLimiter(search_alg, 1)
     results_exp_1 = tune.run(cost,
                              num_samples=5,
                              search_alg=search_alg,
                              verbose=0,
                              name=self.experiment_name,
                              local_dir=self.tmpdir)
     checkpoint_path = os.path.join(self.tmpdir, "warmStartTest.pkl")
     search_alg.save(checkpoint_path)
     return results_exp_1, np.random.get_state(), checkpoint_path
예제 #5
0
 def run_from_experiment_restore(self, random_state):
     search_alg, cost = self.set_basic_conf()
     if not isinstance(search_alg, ConcurrencyLimiter):
         search_alg = ConcurrencyLimiter(search_alg, 1)
     search_alg.restore_from_dir(
         os.path.join(self.tmpdir, self.experiment_name))
     results = tune.run(cost,
                        num_samples=5,
                        search_alg=search_alg,
                        scheduler=self.get_scheduler(),
                        verbose=0,
                        name=self.experiment_name,
                        local_dir=self.tmpdir,
                        reuse_actors=True)
     return results
예제 #6
0
def run_blendsearch_tune_w_budget(time_budget_s=10):
    """run BlendSearch with given time_budget_s"""
    algo = BlendSearch(
        metric="mean_loss",
        mode="min",
        space={
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            "activation": tune.choice(["relu", "tanh"]),
        },
    )
    algo.set_search_properties(config={"time_budget_s": time_budget_s})
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        time_budget_s=time_budget_s,
        num_samples=-1,
        config={
            "steps": 100,
        },
    )

    print("Best hyperparameters found were: ", analysis.best_config)
예제 #7
0
def set_algorithm(experiment_name, config):
    '''
    Configure search algorithm.
    '''
    if args.algorithm == 'hyperopt':
        algorithm = HyperOptSearch(points_to_evaluate=best_params)
    elif args.algorithm == 'ax':
        ax_client = AxClient(enforce_sequential_optimization=False)
        ax_client.create_experiment(name=experiment_name,
                                    parameters=config,
                                    objective_name="minimum",
                                    minimize=True)
        algorithm = AxSearch(ax_client=ax_client,
                             points_to_evaluate=best_params)
    elif args.algorithm == 'nevergrad':
        algorithm = NevergradSearch(
            points_to_evaluate=best_params,
            optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"])
    elif args.algorithm == 'optuna':
        algorithm = OptunaSearch(points_to_evaluate=best_params,
                                 seed=args.seed)
    elif args.algorithm == 'pbt':
        algorithm = PopulationBasedTraining(
            time_attr="training_iteration",
            perturbation_interval=args.perturbation,
            hyperparam_mutations=config,
            synch=True)
    elif args.algorithm == 'random':
        algorithm = BasicVariantGenerator(max_concurrent=args.jobs)
    if args.algorithm not in ['random', 'pbt']:
        algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs)
    return algorithm
예제 #8
0
 def _get_search_algorithm(
     self, search_algorithm, config_space, metric, mode, max_concurrent):
     if search_algorithm == "BO":
         algo = BayesOptSearch(
             utility_kwargs={
             "kind": "ucb",
             "kappa": 2.5,
             "xi": 0.0
         })
         algo = ConcurrencyLimiter(algo, max_concurrent=max_concurrent)
         scheduler = AsyncHyperBandScheduler()
     elif search_algorithm == "BOHB":
         experiment_metrics = dict(metric=metric, mode=mode)
         algo = TuneBOHB(
             config_space, max_concurrent=max_concurrent, **experiment_metrics)
         scheduler = HyperBandForBOHB(
             time_attr="training_iteration",
             reduction_factor=4)
     elif search_algorithm == "PBT":
         # Problem of PBT: It mutates the param value, so sometimes, it generates unacceptable values
         algo = None
         scheduler = PopulationBasedTraining(
             time_attr='training_iteration',
             perturbation_interval=2,  # Every N time_attr units, "perturb" the parameters.
             hyperparam_mutations=config_space)
     elif search_algorithm == "GRID" or search_algorithm == "RANDOM":
         algo = None
         scheduler = None
     else:
         raise Exception(search_algorithm, "is not available yet")
     return algo, scheduler
예제 #9
0
    def set_basic_conf(self):
        space_config = [
            {
                "name": "width",
                "type": "num",
                "lb": 0,
                "ub": 20
            },
            {
                "name": "height",
                "type": "num",
                "lb": -100,
                "ub": 100
            },
        ]
        space = HEBODesignSpace().parse(space_config)

        def cost(param, reporter):
            reporter(loss=(param["height"] - 14)**2 - abs(param["width"] - 3))

        search_alg = HEBOSearch(space=space,
                                metric="loss",
                                mode="min",
                                random_state_seed=5)
        # This is done on purpose to speed up the test, as HEBO will
        # cache suggestions
        search_alg = ConcurrencyLimiter(search_alg, max_concurrent=10)
        return search_alg, cost
예제 #10
0
파일: example.py 프로젝트: sonichi/FLAML
def test_blendsearch_tune(smoke_test=True):
    try:
        from ray import tune
        from ray.tune.suggest import ConcurrencyLimiter
        from ray.tune.schedulers import AsyncHyperBandScheduler
        from ray.tune.suggest.flaml import BlendSearch
    except ImportError:
        print("ray[tune] is not installed, skipping test")
        return
    import numpy as np

    algo = BlendSearch()
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"]),
            "test4": np.zeros((3, 1)),
        },
    )

    print("Best hyperparameters found were: ", analysis.best_config)
예제 #11
0
    def set_basic_conf(self):
        from dragonfly.opt.gp_bandit import EuclideanGPBandit
        from dragonfly.exd.experiment_caller import EuclideanFunctionCaller
        from dragonfly import load_config

        def cost(space, reporter):
            height, width = space["point"]
            reporter(loss=(height - 14)**2 - abs(width - 3))

        domain_vars = [{
            "name": "height",
            "type": "float",
            "min": -10,
            "max": 10
        }, {
            "name": "width",
            "type": "float",
            "min": 0,
            "max": 20
        }]

        domain_config = load_config({"domain": domain_vars})

        func_caller = EuclideanFunctionCaller(
            None, domain_config.domain.list_of_domains[0])
        optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True)
        search_alg = DragonflySearch(
            optimizer, metric="loss", mode="min", random_state_seed=162)
        search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000)
        return search_alg, cost
예제 #12
0
    def test_convergence_gaussian_process(self):
        np.random.seed(0)
        ray.init(local_mode=True, num_cpus=1, num_gpus=1)

        space = {
            "x": (0, 20)  # This is the space of parameters to explore
        }

        resources_per_trial = {"cpu": 1, "gpu": 0}

        # Following bayesian optimization
        gp = BayesOptSearch(space,
                            metric="loss",
                            mode="min",
                            random_search_steps=10)
        gp.repeat_float_precision = 5
        gp = ConcurrencyLimiter(gp, 1)

        # Execution of the BO.
        analysis = tune.run(
            loss,
            # stop=EarlyStopping("loss", mode="min", patience=5),
            search_alg=gp,
            config={},
            num_samples=100,  # Number of iterations
            resources_per_trial=resources_per_trial,
            raise_on_failed_trial=False,
            fail_fast=True,
            verbose=1)
        assert len(analysis.trials) == 41

        ray.shutdown()
예제 #13
0
    def test_convergence_gaussian_process(self):
        np.random.seed(0)
        ray.init(local_mode=True, num_cpus=1, num_gpus=1)

        # This is the space of parameters to explore
        space = {"x": tune.uniform(0, 20)}

        resources_per_trial = {"cpu": 1, "gpu": 0}

        # Following bayesian optimization
        gp = BayesOptSearch(random_search_steps=10)
        gp.repeat_float_precision = 5
        gp = ConcurrencyLimiter(gp, 1)

        # Execution of the BO.
        analysis = tune.run(
            loss,
            metric="loss",
            mode="min",
            # stop=EarlyStopping("loss", mode="min", patience=5),
            search_alg=gp,
            config=space,
            num_samples=100,  # Number of iterations
            resources_per_trial=resources_per_trial,
            raise_on_failed_trial=False,
            fail_fast=True,
            verbose=1)
        assert len(analysis.trials) in {13, 40, 43}  # it is 43 on the cluster?
        assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-5)
예제 #14
0
 def run_exp_3(self):
     print("FULL RUN")
     np.random.seed(162)
     search_alg3, cost = self.set_basic_conf()
     search_alg3 = ConcurrencyLimiter(search_alg3, 1)
     return tune.run(
         cost, num_samples=10, search_alg=search_alg3, verbose=0)
예제 #15
0
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None):
    config = create_config(backtest_config)
    n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1)
    session_dirpath = make_get_filepath(os.path.join('reports', backtest_config['exchange'], backtest_config['symbol'],
                                                     f"{n_days}_days_{ts_to_date(time())[:19].replace(':', '')}", ''))
    iters = 10
    if 'iters' in backtest_config:
        iters = backtest_config['iters']
    else:
        print('Parameter iters should be defined in the configuration. Defaulting to 10.')
    num_cpus = 2
    if 'num_cpus' in backtest_config:
        num_cpus = backtest_config['num_cpus']
    else:
        print('Parameter num_cpus should be defined in the configuration. Defaulting to 2.')
    n_particles = 10
    if 'n_particles' in backtest_config:
        n_particles = backtest_config['n_particles']
    phi1 = 1.4962
    phi2 = 1.4962
    omega = 0.7298
    if 'options' in backtest_config:
        phi1 = backtest_config['options']['c1']
        phi2 = backtest_config['options']['c2']
        omega = backtest_config['options']['w']
    current_best_params = []
    if current_best:
        if type(current_best) == list:
            for c in current_best:
                c = clean_start_config(c, config, backtest_config['ranges'])
                current_best_params.append(c)
        else:
            current_best = clean_start_config(current_best, config, backtest_config['ranges'])
            current_best_params.append(current_best)

    ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False)
    pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2)
    algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params)
    algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus)
    scheduler = AsyncHyperBandScheduler()

    analysis = tune.run(tune.with_parameters(backtest, ticks=ticks), metric='objective', mode='max', name='search',
                        search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1,
                        reuse_actors=True, local_dir=session_dirpath,
                        progress_reporter=LogReporter(metric_columns=['daily_gain', 'closest_liquidation', 'objective'],
                                                      parameter_columns=[k for k in backtest_config['ranges']]))

    ray.shutdown()
    df = analysis.results_df
    df.reset_index(inplace=True)
    df.drop(columns=['trial_id', 'time_this_iter_s', 'done', 'timesteps_total', 'episodes_total', 'training_iteration',
                     'experiment_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip',
                     'time_since_restore', 'timesteps_since_restore', 'iterations_since_restore', 'experiment_tag'],
            inplace=True)
    df.to_csv(os.path.join(backtest_config['session_dirpath'], 'results.csv'), index=False)
    print('Best candidate found:')
    pprint.pprint(analysis.best_config)
    plot_wrap(backtest_config, ticks, clean_result_config(analysis.best_config))
    return analysis
예제 #16
0
def _test_flaml_raytune_consistency(num_samples=-1,
                                    max_concurrent_trials=1,
                                    searcher_name="cfo"):
    try:
        from ray import tune as raytune
    except ImportError:
        print(
            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
        )
        return
    np.random.seed(100)
    searcher = setup_searcher(searcher_name)
    analysis = tune.run(
        evaluate_config,  # the function to evaluate a config
        config=config_search_space,  # the search space
        low_cost_partial_config=
        low_cost_partial_config,  # a initial (partial) config with low cost
        metric="metric",  # the name of the metric used for optimization
        mode="min",  # the optimization mode, 'min' or 'max'
        num_samples=
        num_samples,  # the maximal number of configs to try, -1 means infinite
        time_budget_s=None,  # the time budget in seconds
        local_dir="logs/",  # the local directory to store logs
        search_alg=searcher,
        # verbose=0,          # verbosity
        # use_ray=True, # uncomment when performing parallel tuning using ray
    )
    flaml_best_config = analysis.best_config
    flaml_config_in_results = [v["config"] for v in analysis.results.values()]
    print(analysis.best_trial.last_result)  # the best trial's result
    print("best flaml", searcher_name, flaml_best_config)  # the best config
    print("flaml config in results", searcher_name, flaml_config_in_results)

    np.random.seed(100)
    searcher = setup_searcher(searcher_name)
    from ray.tune.suggest import ConcurrencyLimiter

    search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
    analysis = raytune.run(
        evaluate_config,  # the function to evaluate a config
        config=config_search_space,
        metric="metric",  # the name of the metric used for optimization
        mode="min",  # the optimization mode, 'min' or 'max'
        num_samples=
        num_samples,  # the maximal number of configs to try, -1 means infinite
        local_dir="logs/",  # the local directory to store logs
        # max_concurrent_trials=max_concurrent_trials,
        # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0},
        search_alg=search_alg,
    )
    ray_best_config = analysis.best_config
    ray_config_in_results = [v["config"] for v in analysis.results.values()]
    print(analysis.best_trial.last_result)  # the best trial's result
    print("ray best", searcher_name, analysis.best_config)  # the best config
    print("ray config in results", searcher_name, ray_config_in_results)
    assert ray_best_config == flaml_best_config, "best config should be the same"
    assert (flaml_config_in_results == ray_config_in_results
            ), "results from raytune and flaml should be the same"
예제 #17
0
 def run_full(self):
     np.random.seed(162)
     search_alg3, cost = self.set_basic_conf()
     search_alg3 = ConcurrencyLimiter(search_alg3, 1)
     return tune.run(cost,
                     num_samples=10,
                     search_alg=search_alg3,
                     scheduler=self.get_scheduler(),
                     verbose=0)
예제 #18
0
def backtest_tune(ohlc: np.ndarray, backtest_config: dict):
    config = create_config(backtest_config)
    if not os.path.isdir(os.path.join('reports', backtest_config['symbol'])):
        os.makedirs(os.path.join('reports', backtest_config['symbol']),
                    exist_ok=True)
    report_path = os.path.join('reports', backtest_config['symbol'])
    iters = 10
    if 'iters' in backtest_config:
        iters = backtest_config['iters']
    else:
        print(
            'Parameter iters should be defined in the configuration. Defaulting to 10.'
        )
    num_cpus = 2
    if 'num_cpus' in backtest_config:
        num_cpus = backtest_config['num_cpus']
    else:
        print(
            'Parameter num_cpus should be defined in the configuration. Defaulting to 2.'
        )

    initial_points = max(1, min(int(iters / 10), 20))

    ray.init(num_cpus=num_cpus
             )  # , logging_level=logging.FATAL, log_to_driver=False)

    algo = HyperOptSearch(n_initial_points=initial_points)
    algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus)
    scheduler = AsyncHyperBandScheduler()

    analysis = tune.run(tune.with_parameters(backtest, ohlc=ohlc),
                        metric='objective',
                        mode='max',
                        name='search',
                        search_alg=algo,
                        scheduler=scheduler,
                        num_samples=iters,
                        config=config,
                        verbose=1,
                        reuse_actors=True,
                        local_dir=report_path)

    ray.shutdown()
    session_path = os.path.join(
        os.path.join('sessions', backtest_config['symbol']),
        backtest_config['session_name'])
    if not os.path.isdir(session_path):
        os.makedirs(session_path, exist_ok=True)

    print('Best candidate found is: ', analysis.best_config)
    json.dump(analysis.best_config,
              open(os.path.join(session_path, 'best_config.json'), 'w'),
              indent=4)
    result = backtest(analysis.best_config, ohlc, True)
    result.to_csv(os.path.join(session_path, 'best_trades.csv'), index=False)
    return analysis
예제 #19
0
def optimize_hyperparameters(
    train_model,
    create_model,
    data_train,
    data_test,
    search_space,
    model_kwargs_str,
    callbacks,
    hyperparams_file_name,
    random_seed,
    model_path,
    epochs,
    n_steps,
    num_samples_optim,
):
    tmp_dir = tempfile.TemporaryDirectory(dir=os.getcwd())

    ray.shutdown()
    ray.init(log_to_driver=False, local_mode=True)

    search_alg = HyperOptSearch(random_state_seed=random_seed)
    search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1)
    scheduler = AsyncHyperBandScheduler(time_attr="training_iteration",
                                        grace_period=10)

    analysis = tune.run(
        tune.with_parameters(
            train_model,
            data_train=data_train,
            data_test=data_test,
            create_model=create_model,
            model_kwargs_str=model_kwargs_str,
            callbacks=callbacks,
            epochs=epochs,
            n_steps=n_steps,
        ),
        verbose=1,
        config=search_space,
        search_alg=search_alg,
        scheduler=scheduler,
        resources_per_trial={
            "cpu": os.cpu_count(),
            "gpu": 0
        },
        metric="val_loss",
        mode="min",
        name="ray_tune_keras_hyperopt_gru",
        local_dir=tmp_dir.name,
        num_samples=num_samples_optim,
    )

    shutil.rmtree(tmp_dir)

    best_params = analysis.get_best_config(metric="val_loss", mode="min")
    with open(os.path.join(model_path, hyperparams_file_name), "w") as f:
        json.dump(best_params, f)
예제 #20
0
 def testBootStrapAnalysis(self):
     analysis = self.run_full()
     search_alg3, cost = self.set_basic_conf(analysis)
     if not isinstance(search_alg3, ConcurrencyLimiter):
         search_alg3 = ConcurrencyLimiter(search_alg3, 1)
     tune.run(cost,
              num_samples=10,
              search_alg=search_alg3,
              verbose=0,
              reuse_actors=True)
예제 #21
0
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None):
    config = create_config(backtest_config)
    n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1)
    session_dirpath = make_get_filepath(os.path.join('reports', backtest_config['exchange'], backtest_config['symbol'],
                                                     f"{n_days}_days_{ts_to_date(time())[:19].replace(':', '')}", ''))
    iters = 10
    if 'iters' in backtest_config:
        iters = backtest_config['iters']
    else:
        print('Parameter iters should be defined in the configuration. Defaulting to 10.')
    num_cpus = 2
    if 'num_cpus' in backtest_config:
        num_cpus = backtest_config['num_cpus']
    else:
        print('Parameter num_cpus should be defined in the configuration. Defaulting to 2.')
    n_particles = 10
    if 'n_particles' in backtest_config:
        n_particles = backtest_config['n_particles']
    phi1 = 1.4962
    phi2 = 1.4962
    omega = 0.7298
    if 'options' in backtest_config:
        phi1 = backtest_config['options']['c1']
        phi2 = backtest_config['options']['c2']
        omega = backtest_config['options']['w']
    current_best_params = []
    if current_best:
        if type(current_best) == list:
            for c in current_best:
                c = clean_start_config(c, config, backtest_config['ranges'])
                current_best_params.append(c)
        else:
            current_best = clean_start_config(current_best, config, backtest_config['ranges'])
            current_best_params.append(current_best)

    ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False)
    pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2)
    algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params)
    algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus)
    scheduler = AsyncHyperBandScheduler()

    analysis = tune.run(tune.with_parameters(wrap_backtest, ticks=ticks), metric='objective', mode='max', name='search',
                        search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1,
                        reuse_actors=True, local_dir=session_dirpath,
                        progress_reporter=LogReporter(metric_columns=['daily_gain',
                                                                      'closest_liquidation',
                                                                      'max_hours_between_fills',
                                                                      'objective'],
                                                      parameter_columns=[k for k in backtest_config['ranges'] if type(
                                                          config[k]) == ray.tune.sample.Float or type(
                                                          config[k]) == ray.tune.sample.Integer]))

    ray.shutdown()
    return analysis
예제 #22
0
 def run_full(self):
     np.random.seed(162)
     search_alg3, cost = self.set_basic_conf()
     if not isinstance(search_alg3, ConcurrencyLimiter):
         search_alg3 = ConcurrencyLimiter(search_alg3, 1)
     return tune.run(cost,
                     num_samples=10,
                     search_alg=search_alg3,
                     scheduler=self.get_scheduler(),
                     verbose=0,
                     reuse_actors=True)
예제 #23
0
def run_hyperopt_tune(config_dict=config_space, smoke_test=False):
    algo = HyperOptSearch(space=config_dict, metric="mean_loss", mode="min")
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if smoke_test else 100,
    )

    print("Best hyperparameters found were: ", analysis.best_config)
예제 #24
0
    def testConvergenceBayesOpt(self):
        from ray.tune.suggest.bayesopt import BayesOptSearch

        np.random.seed(0)

        # Following bayesian optimization
        searcher = BayesOptSearch(random_search_steps=10)
        searcher.repeat_float_precision = 5
        searcher = ConcurrencyLimiter(searcher, 1)

        analysis = self._testConvergence(searcher, patience=100)

        assert len(analysis.trials) < 50
        assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-5)
예제 #25
0
    def set_basic_conf(self):
        optimizer = skopt.Optimizer([(0, 20), (-100, 100)])
        previously_run_params = [[10, 0], [15, -20]]
        known_rewards = [-189, -1144]

        def cost(space, reporter):
            reporter(loss=(space["height"]**2 + space["width"]**2))

        search_alg = SkOptSearch(optimizer, ["width", "height"],
                                 metric="loss",
                                 mode="min",
                                 points_to_evaluate=previously_run_params,
                                 evaluated_rewards=known_rewards)
        search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000)
        return search_alg, cost
예제 #26
0
    def set_basic_conf(self):
        instrumentation = 2
        parameter_names = ["height", "width"]
        optimizer = optimizerlib.OnePlusOne(instrumentation)

        def cost(space, reporter):
            reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3))

        search_alg = NevergradSearch(
            optimizer,
            parameter_names,
            metric="loss",
            mode="min",
        )
        search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000)
        return search_alg, cost
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch(metric=["loss", "gain"], mode=["min", "max"])
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    analysis = tune.run(
        easy_objective,
        search_alg=algo,
        num_samples=10 if smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
        })

    print("Best hyperparameters for loss found were: ",
          analysis.get_best_config("loss", "min"))
    print("Best hyperparameters for gain found were: ",
          analysis.get_best_config("gain", "max"))
예제 #28
0
def run_optuna_tune(smoke_test=False):
    algo = OptunaSearch()
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
        easy_objective,
        metric="mean_loss",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=10 if smoke_test else 100,
        config={
            "steps": 100,
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
            "activation": tune.choice(["relu", "tanh"])
        })

    print("Best hyperparameters found were: ", analysis.best_config)
예제 #29
0
    def set_basic_conf(self):
        space = {
            "x": hp.uniform("x", 0, 10),
            "y": hp.uniform("y", -10, 10),
            "z": hp.uniform("z", -10, 0)
        }

        def cost(space, reporter):
            loss = space["x"]**2 + space["y"]**2 + space["z"]**2
            reporter(loss=loss)

        search_alg = HyperOptSearch(
            space,
            metric="loss",
            mode="min",
            random_state_seed=5,
            n_initial_points=1,
        )
        search_alg = ConcurrencyLimiter(search_alg, max_concurrent=1000)
        return search_alg, cost
예제 #30
0
파일: execution.py 프로젝트: yarenty/ludwig
    def execute(
        self,
        config,
        dataset=None,
        training_set=None,
        validation_set=None,
        test_set=None,
        training_set_metadata=None,
        data_format=None,
        experiment_name="hyperopt",
        model_name="run",
        # model_load_path=None,
        # model_resume_path=None,
        skip_save_training_description=False,
        skip_save_training_statistics=False,
        skip_save_model=False,
        skip_save_progress=False,
        skip_save_log=False,
        skip_save_processed_input=True,
        skip_save_unprocessed_output=False,
        skip_save_predictions=False,
        skip_save_eval_stats=False,
        output_directory="results",
        gpus=None,
        gpu_memory_limit=None,
        allow_parallel_threads=True,
        callbacks=None,
        backend=None,
        random_seed=default_random_seed,
        debug=False,
        **kwargs,
    ) -> RayTuneResults:
        if isinstance(dataset, str) and not has_remote_protocol(dataset) and not os.path.isabs(dataset):
            dataset = os.path.abspath(dataset)

        if isinstance(backend, str):
            backend = initialize_backend(backend)

        if gpus is not None:
            raise ValueError(
                "Parameter `gpus` is not supported when using Ray Tune. "
                "Configure GPU resources with Ray and set `gpu_resources_per_trial` in your "
                "hyperopt config."
            )

        if gpu_memory_limit is None and 0 < self._gpu_resources_per_trial_non_none < 1:
            # Enforce fractional GPU utilization
            gpu_memory_limit = self.gpu_resources_per_trial

        hyperopt_dict = dict(
            config=config,
            dataset=dataset,
            training_set=training_set,
            validation_set=validation_set,
            test_set=test_set,
            training_set_metadata=training_set_metadata,
            data_format=data_format,
            experiment_name=experiment_name,
            model_name=model_name,
            # model_load_path=model_load_path,
            # model_resume_path=model_resume_path,
            eval_split=self.split,
            skip_save_training_description=skip_save_training_description,
            skip_save_training_statistics=skip_save_training_statistics,
            skip_save_model=skip_save_model,
            skip_save_progress=skip_save_progress,
            skip_save_log=skip_save_log,
            skip_save_processed_input=skip_save_processed_input,
            skip_save_unprocessed_output=skip_save_unprocessed_output,
            skip_save_predictions=skip_save_predictions,
            skip_save_eval_stats=skip_save_eval_stats,
            output_directory=output_directory,
            gpus=gpus,
            gpu_memory_limit=gpu_memory_limit,
            allow_parallel_threads=allow_parallel_threads,
            callbacks=callbacks,
            backend=backend,
            random_seed=random_seed,
            debug=debug,
        )

        mode = "min" if self.goal != MAXIMIZE else "max"
        metric = "metric_score"
        if self.search_alg_dict is not None:
            if TYPE not in self.search_alg_dict:
                logger.warning("WARNING: Kindly set type param for search_alg " "to utilize Tune's Search Algorithms.")
                search_alg = None
            else:
                search_alg_type = self.search_alg_dict[TYPE]
                search_alg = tune.create_searcher(search_alg_type, metric=metric, mode=mode, **self.search_alg_dict)
        else:
            search_alg = None

        if self.max_concurrent_trials:
            assert (
                self.max_concurrent_trials > 0
            ), f"`max_concurrent_trials` must be greater than 0, got {self.max_concurrent_trials}"
            if isinstance(search_alg, BasicVariantGenerator) or search_alg is None:
                search_alg = BasicVariantGenerator(max_concurrent=self.max_concurrent_trials)
            elif isinstance(search_alg, ConcurrencyLimiter):
                raise ValueError(
                    "You have specified `max_concurrent_trials`, but the search "
                    "algorithm is already a `ConcurrencyLimiter`. FIX THIS "
                    "by setting `max_concurrent_trials=None`."
                )
            else:
                search_alg = ConcurrencyLimiter(search_alg, max_concurrent=self.max_concurrent_trials)

        resources_per_trial = {
            "cpu": self._cpu_resources_per_trial_non_none,
            "gpu": self._gpu_resources_per_trial_non_none,
        }

        def run_experiment_trial(config, local_hyperopt_dict, checkpoint_dir=None):
            return self._run_experiment(
                config, checkpoint_dir, local_hyperopt_dict, self.decode_ctx, _is_ray_backend(backend)
            )

        tune_config = {}
        tune_callbacks = []
        for callback in callbacks or []:
            run_experiment_trial, tune_config = callback.prepare_ray_tune(
                run_experiment_trial,
                tune_config,
                tune_callbacks,
            )

        if _is_ray_backend(backend):
            # we can't set Trial actor's CPUs to 0 so we just go very low
            resources_per_trial = PlacementGroupFactory(
                [{"CPU": 0.001}] + ([{"CPU": 1, "GPU": 1}] * self._gpu_resources_per_trial_non_none)
                if self._gpu_resources_per_trial_non_none
                else [{"CPU": 0.001}] + [{"CPU": 1}] * self._cpu_resources_per_trial_non_none
            )

        if has_remote_protocol(output_directory):
            run_experiment_trial = tune.durable(run_experiment_trial)
            self.sync_config = tune.SyncConfig(sync_to_driver=False, upload_dir=output_directory)
            output_directory = None
        elif self.kubernetes_namespace:
            from ray.tune.integration.kubernetes import NamespacedKubernetesSyncer

            self.sync_config = tune.SyncConfig(sync_to_driver=NamespacedKubernetesSyncer(self.kubernetes_namespace))

        run_experiment_trial_params = tune.with_parameters(run_experiment_trial, local_hyperopt_dict=hyperopt_dict)
        register_trainable(f"trainable_func_f{hash_dict(config).decode('ascii')}", run_experiment_trial_params)

        analysis = tune.run(
            f"trainable_func_f{hash_dict(config).decode('ascii')}",
            config={
                **self.search_space,
                **tune_config,
            },
            scheduler=self.scheduler,
            search_alg=search_alg,
            num_samples=self.num_samples,
            keep_checkpoints_num=1,
            max_failures=1,  # retry a trial failure once
            resources_per_trial=resources_per_trial,
            time_budget_s=self.time_budget_s,
            sync_config=self.sync_config,
            local_dir=output_directory,
            metric=metric,
            mode=mode,
            trial_name_creator=lambda trial: f"trial_{trial.trial_id}",
            trial_dirname_creator=lambda trial: f"trial_{trial.trial_id}",
            callbacks=tune_callbacks,
        )

        if "metric_score" in analysis.results_df.columns:
            ordered_trials = analysis.results_df.sort_values("metric_score", ascending=self.goal != MAXIMIZE)

            # Catch nans in edge case where the trial doesn't complete
            temp_ordered_trials = []
            for kwargs in ordered_trials.to_dict(orient="records"):
                for key in ["parameters", "training_stats", "eval_stats"]:
                    if isinstance(kwargs[key], float):
                        kwargs[key] = {}
                temp_ordered_trials.append(kwargs)

            # Trials w/empty eval_stats fields & non-empty training_stats fields ran intermediate
            # tune.report call(s) but were terminated before reporting eval_stats from post-train
            # evaluation (e.g., trial stopped due to time budget or relatively poor performance.)
            # For any such trials, run model evaluation for the best model in that trial & record
            # results in ordered_trials which is returned & is persisted in hyperopt_statistics.json.
            for trial in temp_ordered_trials:
                if trial["eval_stats"] == "{}" and trial["training_stats"] != "{}":
                    # Evaluate the best model on the eval_split, which is validation_set
                    if validation_set is not None and validation_set.size > 0:
                        trial_path = trial["trial_dir"]
                        best_model_path = self._get_best_model_path(trial_path, analysis)
                        if best_model_path is not None:
                            self._evaluate_best_model(
                                trial,
                                trial_path,
                                best_model_path,
                                validation_set,
                                data_format,
                                skip_save_unprocessed_output,
                                skip_save_predictions,
                                skip_save_eval_stats,
                                gpus,
                                gpu_memory_limit,
                                allow_parallel_threads,
                                backend,
                                debug,
                            )
                        else:
                            logger.warning("Skipping evaluation as no model checkpoints were available")
                    else:
                        logger.warning("Skipping evaluation as no validation set was provided")

            ordered_trials = [TrialResults.from_dict(load_json_values(kwargs)) for kwargs in temp_ordered_trials]
        else:
            logger.warning("No trials reported results; check if time budget lower than epoch latency")
            ordered_trials = []

        return RayTuneResults(ordered_trials=ordered_trials, experiment_analysis=analysis)