def testAsyncHBAllCompletes(self): scheduler = AsyncHyperBandScheduler(max_t=10, brackets=10) trials = [Trial("PPO") for i in range(10)] for t in trials: scheduler.on_trial_add(None, t) for t in trials: self.assertEqual( scheduler.on_trial_result(None, t, result(10, -2)), TrialScheduler.STOP)
def testAlternateMetrics(self): def result2(t, rew): return dict(training_iteration=t, neg_mean_loss=rew) scheduler = AsyncHyperBandScheduler( grace_period=1, time_attr='training_iteration', reward_attr='neg_mean_loss', brackets=1) t1 = Trial("PPO") # mean is 450, max 900, t_max=10 t2 = Trial("PPO") # mean is 450, max 450, t_max=5 scheduler.on_trial_add(None, t1) scheduler.on_trial_add(None, t2) for i in range(10): self.assertEqual( scheduler.on_trial_result(None, t1, result2(i, i * 100)), TrialScheduler.CONTINUE) for i in range(5): self.assertEqual( scheduler.on_trial_result(None, t2, result2(i, 450)), TrialScheduler.CONTINUE) scheduler.on_trial_complete(None, t1, result2(10, 1000)) self.assertEqual( scheduler.on_trial_result(None, t2, result2(5, 450)), TrialScheduler.CONTINUE) self.assertEqual( scheduler.on_trial_result(None, t2, result2(6, 0)), TrialScheduler.CONTINUE)
def testAsyncHBOnComplete(self): scheduler = AsyncHyperBandScheduler(max_t=10, brackets=1) t1, t2 = self.basicSetup(scheduler) t3 = Trial("PPO") scheduler.on_trial_add(None, t3) scheduler.on_trial_complete(None, t3, result(10, 1000)) self.assertEqual( scheduler.on_trial_result(None, t2, result(101, 0)), TrialScheduler.STOP)
def testAsyncHBGracePeriod(self): scheduler = AsyncHyperBandScheduler( grace_period=2.5, reduction_factor=3, brackets=1) t1, t2 = self.basicSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 1000)) scheduler.on_trial_complete(None, t2, result(10, 1000)) t3 = Trial("PPO") scheduler.on_trial_add(None, t3) self.assertEqual( scheduler.on_trial_result(None, t3, result(1, 10)), TrialScheduler.CONTINUE) self.assertEqual( scheduler.on_trial_result(None, t3, result(2, 10)), TrialScheduler.CONTINUE) self.assertEqual( scheduler.on_trial_result(None, t3, result(3, 10)), TrialScheduler.STOP)
def testAsyncHBUsesPercentile(self): scheduler = AsyncHyperBandScheduler( grace_period=1, max_t=10, reduction_factor=2, brackets=1) t1, t2 = self.basicSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 1000)) scheduler.on_trial_complete(None, t2, result(10, 1000)) t3 = Trial("PPO") scheduler.on_trial_add(None, t3) self.assertEqual( scheduler.on_trial_result(None, t3, result(1, 260)), TrialScheduler.STOP) self.assertEqual( scheduler.on_trial_result(None, t3, result(2, 260)), TrialScheduler.STOP)
def run(model, target, size, result_dir, nmaxepochs, nthreads, cuda, b): experiment = transform_experiment() # We'll use multiple processes so disable MKL multithreading os.environ['MKL_NUM_THREADS'] = str(nthreads) torch.set_num_threads(nthreads) try: with open('../config/redis_address', 'r') as f: address = f.read().strip() ray.init(redis_address=address) except: ray.init() ahb = AsyncHyperBandScheduler(reward_attr='negative_loss', max_t=nmaxepochs) trials = run(experiment, scheduler=ahb, raise_on_failed_trial=False, queue_trials=True, early_stop_all_trials=True) trials = [trial for trial in trials if trial.last_result is not None] losses = [ -trial.last_result.get('negative_loss', float('-inf')) for trial in trials ] nparameters = trials[0].last_result['nparameters'] niterations = trials[0].last_result['training_iteration'] print(np.array(losses)) # Polish solutions with L-BFGS polish_fn = ray.remote(num_gpus=0.25 if cuda else 0)(polish) sorted_trials = sorted(trials, key=lambda trial: -trial.last_result.get( 'negative_loss', float('-inf'))) n_trials = min(N_TRIALS_TO_POLISH, len(trials)) sorted_trials = sorted_trials[:n_trials] polished_losses = ray.get([ polish_fn.remote(trial) for trial in sorted_trials[:N_TRIALS_TO_POLISH] ]) for i in range(min(N_TRIALS_TO_POLISH, len(trials))): sorted_trials[i].last_result[ 'polished_negative_loss'] = -polished_losses[i] sorted_polished_trials = sorted( sorted_trials, key=lambda trial: -trial.last_result['polished_negative_loss']) print( np.array([ -trial.last_result['negative_loss'] for trial in sorted_polished_trials ])) print( np.array([ -trial.last_result['polished_negative_loss'] for trial in sorted_polished_trials ])) # print(np.sort(losses)[:N_TRIALS_TO_POLISH]) # print(np.sort(polished_losses)) checkpoint_path = Path(result_dir) / experiment.name checkpoint_path.mkdir(parents=True, exist_ok=True) checkpoint_path /= 'trial.pkl' with checkpoint_path.open('wb') as f: pickle.dump(trials, f) ex.add_artifact(str(checkpoint_path)) if not min( losses + polished_losses ) == -sorted_polished_trials[0].last_result['polished_negative_loss']: print("BEST LOSS", min(losses + polished_losses), "BEST POLISHED", -sorted_polished_trials[0].last_result['polished_negative_loss']) return size, target, model, b, nparameters, niterations, -sorted_polished_trials[ 0].last_result['polished_negative_loss']
"height": (ValueType.CONTINUOUS, [-10, 10], 1e-2), # for discrete dimensions: (discrete, search_range, has_order) "width": (ValueType.DISCRETE, [-10, 10], False) } config = { "num_samples": 200 if args.smoke_test else 1000, "config": { "iterations": 10, # evaluation times }, "stop": { "timesteps_total": 10 # cumstom stop rules } } zoopt_search = ZOOptSearch( algo="Asracos", # only support ASRacos currently budget=config["num_samples"], dim_dict=dim_dict, max_concurrent=4, metric="mean_loss", mode="min") scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") run(easy_objective, search_alg=zoopt_search, name="zoopt_search", scheduler=scheduler, **config)
import ray.tune as tune if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() # asynchronous hyperband early stopping, configured with # `episode_reward_mean` as the # objective and `training_iteration` as the time unit, # which is automatically filled by Tune. ahb = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="episode_reward_max", grace_period=2000, max_t=20000) run_experiments( { "asynchyperband_EC_dqn": { "run": 'DQN', "env": 'ECglass-v0', "stop": { "timesteps_total": 8000000, # "training_iteration": 1 if args.smoke_test else 99999 }, "num_samples": 5, "resources_per_trial": { "cpu": 1, "gpu": 0.2
} } # Optional: Pass the parameter space yourself # space = { # "width": (0, 20), # "height": (-100, 100), # "activation": ["relu", "tanh"] # } previously_run_params = [[10, 0, "relu"], [15, -20, "tanh"]] known_rewards = [-189, -1144] algo = SkOptSearch( # parameter_names=space.keys(), # If you want to set the space # parameter_ranges=space.values(), # If you want to set the space points_to_evaluate=previously_run_params, evaluated_rewards=known_rewards) algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() tune.run( easy_objective, metric="mean_loss", mode="min", name="skopt_exp_with_warmstart", search_alg=algo, scheduler=scheduler, **tune_kwargs)
config = { "gamma": tune.choice([0.5, 0.7, 1]), "step_size": tune.choice([20, 30, 40]), "lr": tune.loguniform(0.0001, 0.000001), "beta1": 0.9, "lambda": tune.loguniform(0.0001, 0.1), "batch_size": tune.choice([64, 128]), "dropout": tune.uniform(0, 0.5), "hidden_size": 128 } ray.shutdown() ray.init() sched = AsyncHyperBandScheduler(time_attr="training_iteration", metric="dev_accuracy", mode="max", max_t=500, grace_period=500) analysis = tune.run(train, config=config, scheduler=sched, num_samples=1, resources_per_trial={"gpu": 2}, local_dir="./results", name="4000_10000", stop={ "dev_accuracy": 0.99, "training_iteration": 5 if args.smoke_test else 1000 })
def ray_train(cfg, pl_module_cls): # We need Munch to hold tune functions. DictConfig can only hold static config. cfg = munchconfig_to_tune_munchconfig(dictconfig_to_munch(cfg)) ray_config = { 'model': cfg.model, 'dataset': cfg.dataset, 'train': cfg.train, 'seed': cfg.seed, 'wandb': cfg.wandb, 'gpu': cfg.runner.gpu_per_trial != 0.0, } dataset_str = cfg.dataset._target_.split('.')[-1] model_str = cfg.model._target_.split('.')[-1] args_str = '_' # If we're writing to dfs or efs already, no need to sync explicitly # This needs to be a noop function, not just False. If False, ray won't restore failed spot instances sync_to_driver = None if not cfg.runner.nfs else lambda source, target: None experiment = Experiment( name=f'{dataset_str}_{model_str}', run=partial(pl_train_with_tune, pl_module_cls=pl_module_cls), local_dir=cfg.runner.result_dir, num_samples=cfg.runner.ntrials if not cfg.smoke_test else 1, resources_per_trial={ 'cpu': 1 + cfg.dataset.num_workers, 'gpu': cfg.runner.gpu_per_trial }, # epochs + 1 because calling trainer.test(model) counts as one epoch stop={ "training_iteration": 1 if cfg.smoke_test else cfg.train.epochs + 1 }, config=ray_config, loggers=[WandbLogger], keep_checkpoints_num=1, # Save disk space, just need 1 for recovery # checkpoint_at_end=True, # checkpoint_freq=1000, # Just to enable recovery with @max_failures max_failures=-1, sync_to_driver=sync_to_driver, # As of Ray 1.0.0, still need this here ) if cfg.smoke_test or cfg.runner.local: ray.init(num_gpus=torch.cuda.device_count()) else: try: ray.init(address='auto') except: try: with open(project_root / 'ray_config/redis_address', 'r') as f: address = f.read().strip() with open(project_root / 'ray_config/redis_password', 'r') as f: password = f.read().strip() ray.init(address=address, _redis_password=password) except: ray.init(num_gpus=torch.cuda.device_count()) import warnings warnings.warn("Running Ray with just one node") if cfg.runner.hyperband: scheduler = AsyncHyperBandScheduler( metric='mean_accuracy', mode='max', max_t=cfg.train.epochs + 1, grace_period=cfg.runner.grace_period) else: scheduler = None trials = ray.tune.run( experiment, scheduler=scheduler, # sync_config=SyncConfig(sync_to_driver=sync_to_driver), raise_on_failed_trial=False, queue_trials=True) return trials
resource_per_trial['gpu'] = 2.0 # sampling_space['data_parallel'] = True for set_path in args.set: setname = Path(set_path).name.split('_')[0] sampling_space['set_path'] = get_set_path(set_path) logger.info( 'Search hyper-parameters for %s based on configuration at %s', set_path, conf_path) exp_name = '%s-%s' % (setname, base_conf.model.experiment_name) if args.trial_scheduler == 'hyperband': scheduler = AsyncHyperBandScheduler(time_attr='progress', metric='correct', mode='max', max_t=args.epoch, grace_period=args.epoch / 10, reduction_factor=2, brackets=4) elif args.trial_scheduler == 'median': scheduler = MedianStoppingRule(time_attr='progress', metric='correct', grace_period=args.epoch / 10, min_time_slice=args.epoch / 2) elif args.trial_scheduler == 'pbt': scheduler = PopulationBasedTraining( time_attr='progress', metric='correct', mode='max', perturbation_interval=args.epoch / 4, hyperparam_mutations=perturb_space)
'lossName': tune.choice(['mse']), 'optimizer': None, 'optName': tune.choice(['adam']), 'batch_size': None, 'batch_size_j': tune.quniform(5, 12, 1), 'initLR_pre': tune.uniform(np.log(3e-4), np.log(1e-2)), 'reduceLR_pre': tune.uniform(np.log(0.05), np.log(0.4)), 'min_deltaLR_pre': tune.uniform(np.log(3e-5), np.log(1e-4)), 'steps': None, 'batch_norm_cnn': tune.choice([False, True]), 'batch_norm_flat': tune.choice([False, True]) } # ASHA Scheduler asha = AsyncHyperBandScheduler(time_attr='training_iteration', metric="val_loss", mode="min", grace_period=6) # HyperOpt ''' initial_best_config = [{'conv_filter' : [32, 64, 64, 64, 128, 128], 'conv_kernel' : 7, 'conv_repeat' : 1, 'pool_len' : 3, 'fc_dims' : [256, 256, 240], 'dropFrac' : 0.01, 'lossName' : 'mse', 'optimizer' : ['nadam', 0.001, 1.1e-07], 'batch_size' : 128, 'reduceLR_factor' : 0.448298765780917 },
def compile( self, input_df, model_create_func, search_space, recipe, feature_transformers=None, # model=None, future_seq_len=1, validation_df=None, mc=False, metric="mse", metric_mode="min"): """ Do necessary preparations for the engine :param input_df: :param search_space: :param num_samples: :param stop: :param search_algorithm: :param search_algorithm_params: :param fixed_params: :param feature_transformers: :param model: :param validation_df: :param metric: :return: """ # prepare parameters for search engine runtime_params = recipe.runtime_params() num_samples = runtime_params['num_samples'] stop = dict(runtime_params) search_algorithm_params = recipe.search_algorithm_params() search_algorithm = recipe.search_algorithm() fixed_params = recipe.fixed_params() schedule_algorithm = recipe.scheduler_algorithm() del stop['num_samples'] self.search_space = self._prepare_tune_config(search_space) self.stop_criteria = stop self.num_samples = num_samples if schedule_algorithm == 'AsyncHyperBand': from ray.tune.schedulers import AsyncHyperBandScheduler self.sched = AsyncHyperBandScheduler( time_attr="training_iteration", metric="reward_metric", mode="max", max_t=50, grace_period=1, reduction_factor=3, brackets=3, ) else: from ray.tune.schedulers import FIFOScheduler self.sched = FIFOScheduler() if search_algorithm == 'BayesOpt': self.search_algorithm = BayesOptSearch( self.search_space, metric="reward_metric", mode="max", utility_kwargs=search_algorithm_params["utility_kwargs"]) elif search_algorithm == 'SkOpt': from skopt import Optimizer from ray.tune.suggest.skopt import SkOptSearch opt_params = recipe.opt_params() optimizer = Optimizer(opt_params) self.search_algorithm = SkOptSearch( optimizer, list(self.search_space.keys()), metric="reward_metric", mode="max", ) else: self.search_algorithm = None self.fixed_params = fixed_params self.train_func = self._prepare_train_func( input_df=input_df, model_create_func=model_create_func, feature_transformers=feature_transformers, future_seq_len=future_seq_len, validation_df=validation_df, metric=metric, metric_mode=metric_mode, mc=mc, remote_dir=self.remote_dir)
return {'mean_squared_error': acc} def _save(self, checkpoint_dir): checkpoint_path = os.path.join(checkpoint_dir, "model.pt") torch.save(self.model, checkpoint_path) return checkpoint_path def _restore(self, checkpoint_path): self.model.load_state_dict(torch.load(checkpoint_path)) # Scheduler for job scheduling async_hb_scheduler = AsyncHyperBandScheduler(time_attr='training_iteration', metric='mean_squared_error', mode='min', max_t=256, grace_period=10, reduction_factor=3, brackets=3) # Actual run call analysis = tune.run( trainModel, name=experimentNm, config={ "lr": tune.sample_from(lambda spec: 10**(np.random.uniform( hparams["lr"][0], hparams["lr"][1]))), "wd": tune.sample_from(lambda spec: 10**(np.random.uniform( hparams["wd"][0], hparams["wd"][1]))),
parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") parser.add_argument( "--ray-address", help="Address of Ray cluster for seamless distributed execution.") args, _ = parser.parse_known_args() ray.init(address=args.ray_address) # asynchronous hyperband early stopping, configured with # `episode_reward_mean` as the # objective and `training_iteration` as the time unit, # which is automatically filled by Tune. ahb = AsyncHyperBandScheduler( time_attr="training_iteration", metric="episode_reward_mean", mode="max", grace_period=5, max_t=100) run(MyTrainableClass, name="asynchyperband_test", scheduler=ahb, stop={"training_iteration": 1 if args.smoke_test else 99999}, num_samples=20, resources_per_trial={ "cpu": 1, "gpu": 0 }, config={ "width": sample_from(lambda spec: 10 + int(90 * random.random())), "height": sample_from(lambda spec: int(100 * random.random())),
if __name__ == "__main__": args = parser.parse_args() import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler, FIFOScheduler ray.init() if args.scheduler == "fifo": sched = FIFOScheduler() elif args.scheduler == "asynchyperband": sched = AsyncHyperBandScheduler(time_attr="training_iteration", metric="mean_loss", mode="min", max_t=400, grace_period=60) else: raise NotImplementedError tune.register_trainable( "TRAIN_FN", lambda config, reporter: train_cifar10(args, config, reporter)) tune.run("TRAIN_FN", name=args.expname, verbose=2, scheduler=sched, stop={ "mean_accuracy": 0.98, "training_iteration": 1 if args.smoke_test else args.epochs },
"ndf": 64, "lrD": hp.loguniform('lrD', -8, -1), "lrG": hp.loguniform('lrG', -8, -1), "beta1": hp.uniform('beta1', 0, 1), "beta2": hp.uniform('beta2', 0, 1), "Diters": 5, "noBN": False, "type": hp.choice('type', ["dcgan", "mlp", "resnet"]), } ray.init() algo = HyperOptSearch(space, max_concurrent=4, reward_attr="inception") sched = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="inception", max_t=8, grace_period=2) def train(config, reporter): args.update(config) main(args, reporter) tune.register_trainable("main", train) tune.run_experiments( { "var_constraint": { "stop": { "inception": 6,
def trial_scheduler(self): metric = self.trial_metric() return AsyncHyperBandScheduler( metric=metric.name, mode=metric.mode, max_t=ITERS, grace_period=30 )
opt=parser.parse_args() if opt.unique: train(torch.device("cuda:0"),opt.maxepochs,opt.L2) else: ray.init(num_cpus=10,num_gpus=2) data_train=pin_in_object_store(LSTMDataset_ByPat(file_path="~/Data/MIMIC/")) data_val=pin_in_object_store(LSTMDataset_ByPat(csv_file_serie="LSTM_tensor_val.csv",file_path="~/Data/MIMIC/",cov_path="LSTM_covariates_val",tag_path="LSTM_death_tags_val.csv")) means_df=pd.Series.from_csv("~/Data/MIMIC/mean_features.csv") means_vec=pin_in_object_store(torch.tensor(means_df.as_matrix(),dtype=torch.float)) tune.register_trainable("my_class", train_class) hyperband=AsyncHyperBandScheduler(time_attr="training_iteration",reward_attr="mean_accuracy",max_t=350,grace_period=15) exp={ 'run':"my_class", 'repeat':30, 'stop':{"training_iteration":350}, 'trial_resources':{ "gpu":1, "cpu":1 }, 'config':{ "L2":lambda spec: 10**(3*random.random()-6) } }
"bn": "True", "meta_epochs": 10000, "K": 2, "cir_inner_loop": 1.0, "n_queries": tune.grid_search([30, 60]), "stop_grad": "False", "anil": "False", "seed": 1, "summary_dir": "CIFAR_FS_OCMAML_BN_" } ray.init() ahb = AsyncHyperBandScheduler(time_attr="training_iteration", metric="mean_loss", mode="min", grace_period=int(1000), max_t=int(20000)) analysis = tune.run(main, config=config[args.hps], resources_per_trial={ "cpu": 3, "gpu": 1 }, scheduler=ahb) df = analysis.dataframe() results_dir_path = './results/' if (not (os.path.exists(results_dir_path))):
batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(x_test, y_test), callbacks=[TuneReportCallback({"mean_accuracy": "accuracy"})]) if __name__ == "__main__": import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler mnist.load_data() # we do this on the driver because it's not threadsafe ray.init(num_cpus=4 if args.smoke_test else None) sched = AsyncHyperBandScheduler(time_attr="training_iteration", max_t=400, grace_period=20) analysis = tune.run(train_mnist, name="exp", scheduler=sched, metric="mean_accuracy", mode="max", stop={ "mean_accuracy": 0.99, "training_iteration": 5 if args.smoke_test else 300 }, num_samples=10, resources_per_trial={ "cpu": 8, "gpu": 1
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None): config = create_config(backtest_config) n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1) session_dirpath = make_get_filepath( os.path.join( 'reports', backtest_config['exchange'], backtest_config['symbol'], f"{n_days}_days_{ts_to_date(time())[:19].replace(':', '')}", '')) iters = 10 if 'iters' in backtest_config: iters = backtest_config['iters'] else: print( 'Parameter iters should be defined in the configuration. Defaulting to 10.' ) num_cpus = 2 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print( 'Parameter num_cpus should be defined in the configuration. Defaulting to 2.' ) n_particles = 10 if 'n_particles' in backtest_config: n_particles = backtest_config['n_particles'] phi1 = 1.4962 phi2 = 1.4962 omega = 0.7298 if 'options' in backtest_config: phi1 = backtest_config['options']['c1'] phi2 = backtest_config['options']['c2'] omega = backtest_config['options']['w'] current_best_params = [] if current_best: if type(current_best) == list: for c in current_best: c = clean_start_config(c, config, backtest_config['ranges']) current_best_params.append(c) else: current_best = clean_start_config(current_best, config, backtest_config['ranges']) current_best_params.append(current_best) ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False) pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2) algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() analysis = tune.run(tune.with_parameters(wrap_backtest, ticks=ticks), metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=session_dirpath, progress_reporter=LogReporter( metric_columns=[ 'daily_gain', 'closest_liquidation', 'max_hrs_no_fills', 'max_hrs_no_fills_same_side', 'objective' ], parameter_columns=[ k for k in backtest_config['ranges'] if type(config[k]) == ray.tune.sample.Float or type(config[k]) == ray.tune.sample.Integer ])) ray.shutdown() return analysis
"type": "range", "bounds": [0.0, 1.0], }, { "name": "x4", "type": "range", "bounds": [0.0, 1.0], }, { "name": "x5", "type": "range", "bounds": [0.0, 1.0], }, { "name": "x6", "type": "range", "bounds": [0.0, 1.0], }, ] client = AxClient(enforce_sequential_optimization=False) client.create_experiment( parameters=parameters, objective_name="hartmann6", minimize=True, # Optional, defaults to False. parameter_constraints=["x1 + x2 <= 2.0"], # Optional. outcome_constraints=["l2norm <= 1.25"], # Optional. ) algo = AxSearch(client, max_concurrent=4) scheduler = AsyncHyperBandScheduler(metric="hartmann6", mode="max") run(easy_objective, name="ax", search_alg=algo, **config)
def main(args): utils.init_random() exp_configs, tune_configs = utils.get_tune_configs(args.logdir) hparams = {} parameters = [] for param_subset, params in tune_configs.items(): hparams[param_subset] = [] for param, options in params.items(): parameters.append({'name': param, **options}) hparams[param_subset].append(param) exp_configs['hparams'] = hparams exp_configs['data_params']['subset'] = args.subset exp_configs['data_params']['workers'] = args.ds_workers max_epochs = 2 if args.smoke else args.max_epochs num_samples = 2 if args.smoke else args.num_samples exp_configs.update({'num_gpus': 1}) # ray.init() ray.init(memory=2000 * 1024 * 1024, object_store_memory=200 * 1024 * 1024, driver_object_store_memory=100 * 1024 * 1024) scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", metric="val_accuracy", mode="max", grace_period=5, max_t=max(max_epochs, 5)) client = AxClient(enforce_sequential_optimization=True) client.create_experiment(parameters=parameters, objective_name='val_accuracy') search_alg = AxSearch(client, max_concurrent=1, mode='max') # search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2) reporter = CLIReporter() reporter.add_metric_column("val_accuracy") reporter.add_metric_column("train_loss") trainable = TorchTrainer.as_trainable( model_creator=utils.model_creator, data_creator=utils.data_creator, optimizer_creator=utils.optimizer_creator, loss_creator=utils.loss_creator, scheduler_creator=utils.scheduler_creator, scheduler_step_freq="epoch", use_gpu=True, config={BATCH_SIZE: exp_configs['batch_size']}, num_workers=args.workers) analysis = tune.run(trainable, num_samples=num_samples, config=exp_configs, trial_name_creator=utils.trial_str_creator, progress_reporter=reporter, scheduler=scheduler, search_alg=search_alg, stop={"training_iteration": max_epochs}, local_dir=args.logdir, checkpoint_freq=10, checkpoint_at_end=True, keep_checkpoints_num=3, resume=args.resume, checkpoint_score_attr='val_accuracy', max_failures=2, verbose=1)
pbt = PopulationBasedTraining(time_attr="training_iteration", reward_attr="mean_accuracy", hyperparam_mutations={ 'block': [ tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25))] } ) trials=tune.run_experiments(configuration, scheduler=pbt) print(trials) exit(1) sched = AsyncHyperBandScheduler( time_attr="timesteps_total", reward_attr="mean_accuracy", max_t=400, grace_period=20) cifar_spec = { 'stop':{'mean_accuracy':0.99}, 'config':{ 'block': tune.grid_search([3, 6, 12, 24]) } } #tune.run('train_cifar_tune', name="mytune", scheduler=sched, **cifar_spec) tune.run('train_cifar_tune', name="mytune", scheduler=sched ) '''tune.run('train_cifar_tune', name="mytune", scheduler=sched, **{ 'stop':{'mean_accuracy':0.99}, 'config':{ "lr": tune.sample_from(
if __name__ == "__main__": import argparse from nevergrad.optimization import optimizerlib parser = argparse.ArgumentParser() parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() config = { "num_samples": 10 if args.smoke_test else 50, "config": { "iterations": 100, }, "stop": { "timesteps_total": 100 } } optimizer = optimizerlib.OnePlusOne(dimension=2) algo = NevergradSearch(optimizer, ["height", "width"], max_concurrent=4, reward_attr="neg_mean_loss") scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") run(easy_objective, name="nevergrad", search_alg=algo, scheduler=scheduler, **config)
if __name__ == "__main__": parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument("--cuda", action="store_true", default=False, help="Enables GPU training") parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") parser.add_argument( "--ray-address", help="Address of Ray cluster for seamless distributed execution.") args = parser.parse_args() if args.ray_address: ray.init(address=args.ray_address) sched = AsyncHyperBandScheduler(time_attr="training_iteration", metric="mean_accuracy") analysis = tune.run( train_mnist, name="exp", scheduler=sched, stop={ "mean_accuracy": 0.98, "training_iteration": 5 if args.smoke_test else 100 }, resources_per_trial={ "cpu": 2, "gpu": int(args.cuda) }, num_samples=1 if args.smoke_test else 50, config={ "lr": tune.sample_from(lambda spec: 10**(-10 * np.random.rand())),
def hparams(algorithm, scheduler, num_samples, tensorboard, bare): from glob import glob import tensorflow.summary from tensorflow import random as tfrandom, int64 as tfint64 from ray import init as init_ray, shutdown as shutdown_ray from ray import tune from wandb.ray import WandbLogger from wandb import sweep as wandbsweep from wandb.apis import CommError as wandbCommError # less summaries are logged if MLENCRYPT_TB is TRUE (for efficiency) # TODO: use tf.summary.record_if? environ["MLENCRYPT_TB"] = str(tensorboard).upper() environ["MLENCRYPT_BARE"] = str(bare).upper() if getenv('MLENCRYPT_TB', 'FALSE') == 'TRUE' and \ getenv('MLENCRYPT_BARE', 'FALSE') == 'TRUE': raise ValueError('TensorBoard logging cannot be enabled in bare mode.') logdir = f'logs/hparams/{datetime.now()}' # "These results show that K = 3 is the optimal choice for the # cryptographic application of neural synchronization. K = 1 and K = 2 are # too insecure in regard to the geometric attack. And for K > 3 the effort # of A and B grows exponentially with increasing L, while the simple attack # is quite successful in the limit K -> infinity. Consequently, one should # only use Tree Parity Machines with three hidden units for the neural # key-exchange protocol." (Ruttor, 2006) # https://arxiv.org/pdf/0711.2411.pdf#page=59 update_rules = [ 'random-same', # 'random-different-A-B-E', 'random-different-A-B', 'hebbian', 'anti_hebbian', 'random_walk' ] K_bounds = {'min': 4, 'max': 8} N_bounds = {'min': 4, 'max': 8} L_bounds = {'min': 4, 'max': 8} # TODO: don't use *_bounds.values() since .values doesn't preserve order def get_session_num(logdir): current_runs = glob(join(logdir, "run-*")) if current_runs: last_run_path = current_runs[-1] last_run_session_num = int(last_run_path.split('-')[-1]) return last_run_session_num + 1 else: # there are no runs yet, start at 0 return 0 def trainable(config, reporter): """ Args: config (dict): Parameters provided from the search algorithm or variant generation. """ if not isinstance(config['update_rule'], str): update_rule = update_rules[int(config['update_rule'])] else: update_rule = config['update_rule'] K, N, L = int(config['K']), int(config['N']), int(config['L']) run_name = f"run-{get_session_num(logdir)}" run_logdir = join(logdir, run_name) # for each attack, the TPMs should start with the same weights initial_weights_tensors = get_initial_weights(K, N, L) training_steps_ls = {} eve_scores_ls = {} losses_ls = {} # for each attack, the TPMs should use the same inputs seed = tfrandom.uniform([], minval=0, maxval=tfint64.max, dtype=tfint64).numpy() for attack in ['none', 'geometric']: initial_weights = { tpm: weights_tensor_to_variable(weights, tpm) for tpm, weights in initial_weights_tensors.items() } tfrandom.set_seed(seed) if tensorboard: attack_logdir = join(run_logdir, attack) attack_writer = tensorflow.summary.create_file_writer( attack_logdir) with attack_writer.as_default(): training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) else: training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) training_steps_ls[attack] = training_steps eve_scores_ls[attack] = sync_scores losses_ls[attack] = loss avg_training_steps = tensorflow.math.reduce_mean( list(training_steps_ls.values())) avg_eve_score = tensorflow.math.reduce_mean( list(eve_scores_ls.values())) mean_loss = tensorflow.math.reduce_mean(list(losses_ls.values())) reporter( avg_training_steps=avg_training_steps.numpy(), avg_eve_score=avg_eve_score.numpy(), mean_loss=mean_loss.numpy(), done=True, ) if algorithm == 'hyperopt': from hyperopt import hp as hyperopt from hyperopt.pyll.base import scope from ray.tune.suggest.hyperopt import HyperOptSearch space = { 'update_rule': hyperopt.choice( 'update_rule', update_rules, ), 'K': scope.int(hyperopt.quniform('K', *K_bounds.values(), q=1)), 'N': scope.int(hyperopt.quniform('N', *N_bounds.values(), q=1)), 'L': scope.int(hyperopt.quniform('L', *L_bounds.values(), q=1)), } algo = HyperOptSearch( space, metric='mean_loss', mode='min', points_to_evaluate=[ { 'update_rule': 0, 'K': 3, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 128 }, ], ) elif algorithm == 'bayesopt': from ray.tune.suggest.bayesopt import BayesOptSearch space = { 'update_rule': (0, len(update_rules)), 'K': tuple(K_bounds.values()), 'N': tuple(N_bounds.values()), 'L': tuple(L_bounds.values()), } algo = BayesOptSearch( space, metric="mean_loss", mode="min", # TODO: what is utility_kwargs for and why is it needed? utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) elif algorithm == 'nevergrad': from ray.tune.suggest.nevergrad import NevergradSearch from nevergrad import optimizers from nevergrad import p as ngp algo = NevergradSearch( optimizers.TwoPointsDE( ngp.Instrumentation( update_rule=ngp.Choice(update_rules), K=ngp.Scalar(lower=K_bounds['min'], upper=K_bounds['max']).set_integer_casting(), N=ngp.Scalar(lower=N_bounds['min'], upper=N_bounds['max']).set_integer_casting(), L=ngp.Scalar(lower=L_bounds['min'], upper=L_bounds['max']).set_integer_casting(), )), None, # since the optimizer is already instrumented with kwargs metric="mean_loss", mode="min") elif algorithm == 'skopt': from skopt import Optimizer from ray.tune.suggest.skopt import SkOptSearch optimizer = Optimizer([ update_rules, tuple(K_bounds.values()), tuple(N_bounds.values()), tuple(L_bounds.values()) ]) algo = SkOptSearch( optimizer, ["update_rule", "K", "N", "L"], metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'dragonfly': # TODO: doesn't work from ray.tune.suggest.dragonfly import DragonflySearch from dragonfly.exd.experiment_caller import EuclideanFunctionCaller from dragonfly.opt.gp_bandit import EuclideanGPBandit # from dragonfly.exd.experiment_caller import CPFunctionCaller # from dragonfly.opt.gp_bandit import CPGPBandit from dragonfly import load_config domain_config = load_config({ "domain": [ { "name": "update_rule", "type": "discrete", "dim": 1, "items": update_rules }, { "name": "K", "type": "int", "min": K_bounds['min'], "max": K_bounds['max'], # "dim": 1 }, { "name": "N", "type": "int", "min": N_bounds['min'], "max": N_bounds['max'], # "dim": 1 }, { "name": "L", "type": "int", "min": L_bounds['min'], "max": L_bounds['max'], # "dim": 1 } ] }) func_caller = EuclideanFunctionCaller( None, domain_config.domain.list_of_domains[0]) optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True) algo = DragonflySearch( optimizer, metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'bohb': from ConfigSpace import ConfigurationSpace from ConfigSpace import hyperparameters as CSH from ray.tune.suggest.bohb import TuneBOHB config_space = ConfigurationSpace() config_space.add_hyperparameter( CSH.CategoricalHyperparameter("update_rule", choices=update_rules)) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='K', lower=K_bounds['min'], upper=K_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='N', lower=N_bounds['min'], upper=N_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='L', lower=L_bounds['min'], upper=L_bounds['max'])) algo = TuneBOHB(config_space, metric="mean_loss", mode="min") elif algorithm == 'zoopt': from ray.tune.suggest.zoopt import ZOOptSearch from zoopt import ValueType space = { "update_rule": (ValueType.DISCRETE, range(0, len(update_rules)), False), "K": (ValueType.DISCRETE, range(K_bounds['min'], K_bounds['max'] + 1), True), "N": (ValueType.DISCRETE, range(N_bounds['min'], N_bounds['max'] + 1), True), "L": (ValueType.DISCRETE, range(L_bounds['min'], L_bounds['max'] + 1), True), } # TODO: change budget to a large value algo = ZOOptSearch(budget=10, dim_dict=space, metric="mean_loss", mode="min") # TODO: use more appropriate arguments for schedulers: # https://docs.ray.io/en/master/tune/api_docs/schedulers.html if scheduler == 'fifo': sched = None # Tune defaults to FIFO elif scheduler == 'pbt': from ray.tune.schedulers import PopulationBasedTraining from random import randint sched = PopulationBasedTraining( metric="mean_loss", mode="min", hyperparam_mutations={ "update_rule": update_rules, "K": lambda: randint(K_bounds['min'], K_bounds['max']), "N": lambda: randint(N_bounds['min'], N_bounds['max']), "L": lambda: randint(L_bounds['min'], L_bounds['max']), }) elif scheduler == 'ahb' or scheduler == 'asha': # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler from ray.tune.schedulers import AsyncHyperBandScheduler sched = AsyncHyperBandScheduler(metric="mean_loss", mode="min") elif scheduler == 'hb': from ray.tune.schedulers import HyperBandScheduler sched = HyperBandScheduler(metric="mean_loss", mode="min") elif algorithm == 'bohb' or scheduler == 'bohb': from ray.tune.schedulers import HyperBandForBOHB sched = HyperBandForBOHB(metric="mean_loss", mode="min") elif scheduler == 'msr': from ray.tune.schedulers import MedianStoppingRule sched = MedianStoppingRule(metric="mean_loss", mode="min") init_ray( address=getenv("ip_head"), redis_password=getenv('redis_password'), ) analysis = tune.run( trainable, name='mlencrypt_research', config={ "monitor": True, "env_config": { "wandb": { "project": "mlencrypt-research", "sync_tensorboard": True, }, }, }, # resources_per_trial={"cpu": 1, "gpu": 3}, local_dir='./ray_results', export_formats=['csv'], # TODO: add other formats? num_samples=num_samples, loggers=[ tune.logger.JsonLogger, tune.logger.CSVLogger, tune.logger.TBXLogger, WandbLogger ], search_alg=algo, scheduler=sched, queue_trials=True, ) try: wandbsweep(analysis) except wandbCommError: # see https://docs.wandb.com/sweeps/ray-tune#feature-compatibility pass best_config = analysis.get_best_config(metric='mean_loss', mode='min') print(f"Best config: {best_config}") shutdown_ray()
def backtest_tune(ticks: np.ndarray, backtest_config: dict, current_best: Union[dict, list] = None): config = create_config(backtest_config) n_days = round_((ticks[-1][2] - ticks[0][2]) / (1000 * 60 * 60 * 24), 0.1) backtest_config['optimize_dirpath'] = os.path.join( backtest_config['optimize_dirpath'], ts_to_date(time())[:19].replace(':', ''), '') if 'iters' in backtest_config: iters = backtest_config['iters'] else: print( 'Parameter iters should be defined in the configuration. Defaulting to 10.' ) iters = 10 if 'num_cpus' in backtest_config: num_cpus = backtest_config['num_cpus'] else: print( 'Parameter num_cpus should be defined in the configuration. Defaulting to 2.' ) num_cpus = 2 n_particles = backtest_config[ 'n_particles'] if 'n_particles' in backtest_config else 10 phi1 = 1.4962 phi2 = 1.4962 omega = 0.7298 if 'options' in backtest_config: phi1 = backtest_config['options']['c1'] phi2 = backtest_config['options']['c2'] omega = backtest_config['options']['w'] current_best_params = [] if current_best: if type(current_best) == list: for c in current_best: c = clean_start_config(c, config, backtest_config['ranges']) if c not in current_best_params: current_best_params.append(c) else: current_best = clean_start_config(current_best, config, backtest_config['ranges']) current_best_params.append(current_best) ray.init(num_cpus=num_cpus, logging_level=logging.FATAL, log_to_driver=False) pso = ng.optimizers.ConfiguredPSO(transform='identity', popsize=n_particles, omega=omega, phip=phi1, phig=phi2) algo = NevergradSearch(optimizer=pso, points_to_evaluate=current_best_params) algo = ConcurrencyLimiter(algo, max_concurrent=num_cpus) scheduler = AsyncHyperBandScheduler() if 'wfo' in config and config['wfo']: print('\n\nwalk forward optimization\n\n') wfo = WFO(ticks, backtest_config, P_train=0.5).set_train_N(4) backtest_wrap = lambda config: tune_report(wfo.backtest(config)) else: print('\n\nsimple sliding window optimization\n\n') backtest_wrap = tune.with_parameters(simple_sliding_window_wrap, ticks=ticks) analysis = tune.run(backtest_wrap, metric='objective', mode='max', name='search', search_alg=algo, scheduler=scheduler, num_samples=iters, config=config, verbose=1, reuse_actors=True, local_dir=backtest_config['optimize_dirpath'], progress_reporter=LogReporter( metric_columns=[ 'daily_gain', 'closest_liquidation', 'max_hrs_no_fills', 'max_hrs_no_fills_same_side', 'objective' ], parameter_columns=[ k for k in backtest_config['ranges'] if type(config[k]) == ray.tune.sample.Float or type(config[k]) == ray.tune.sample.Integer ]), raise_on_failed_trial=False) ray.shutdown() return analysis
def search(runner, conf_dir_file): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1" def train_once(runner): for epoch in range(runner.train_epochs): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() runner.model.train() for i, data in enumerate(runner.tr_loader): images, labels = data if conf.get()['cuda']['avail']: images, labels = images.to(runner.device), labels.to( runner.device) runner.model = runner.model.to(runner.device) runner.optimizer.zero_grad() outputs, loss = runner.regularizer(images, labels) loss.backward() runner.optimizer.step() ttop1, ttop5 = runner.accuracy(outputs, labels, (1, 5)) losses.update(loss.item(), images.size(0)) top1.update(ttop1.item(), images.size(0)) top5.update(ttop5.item(), images.size(0)) print( '[{:d}/{:d}] <<<TRAIN>>> lr({:.10f}) loss({:.4f}) top1({:.3f}) top5({:.3f})' .format(epoch + 1, runner.train_epochs, runner.optimizer.param_groups[0]['lr'], losses.avg, top1.avg, top5.avg)) runner.scheduler.step() def train(config): conf_dir_file = config['conf_dir_file'] my_conf = Config(filename=conf_dir_file) for key, value in config.items(): if key != 'conf_dir_file': my_conf.get()['model'][key] = value my_conf.get()['model']['name'] = 'rexnetv1_search' my_conf.get()['model']['input_ch'] = int( my_conf.get()['model']['input_ch']) my_conf.get()['model']['final_ch'] = int( my_conf.get()['model']['final_ch']) my_conf.get()['model']['use_se'] = round( my_conf.get()['model']['use_se']) my_conf.get()['model']['se_ratio'] = int( my_conf.get()['model']['se_ratio']) # my_conf.get()['model']['lr'] = config['lr'] # my_conf.get()['optimizer']['name'] = config['optimizer'] # my_conf.get()['scheduler']['name'] = config['scheduler'] # my_conf.get()['model']['config'] = np.array(config['network_block_cfg']).reshape(-1,4).tolist() my_runner = Runner(my_conf) train_once(my_runner) my_mean_accuracy = my_runner.best_acc_top1 tune.report(mean_accuracy=my_mean_accuracy) ray.init(configure_logging=False) search_config = { # "conf_dir_file": hp.choice('conf_dir_file', [conf_dir_file]), "input_ch": (16, 32), "final_ch": (180, 320), "width_mult": (1.0, 3.0), "depth_mult": (1.0, 3.0), "use_se": (False, True), "se_ratio": (6, 24), "dropout_ratio": (0.1, 0.5), # "bn_momentum": (0.1, 0.9), "lr": (0.001, 0.125) # "optimizer": tune.choice(['SGD','SGDP','Adam','AdamP']), # "scheduler": tune.choice(['CosineAnnealingLR','MultiStepLR']), # "network_block_cfg": tune.grid_search([ # [2.5, 20, 2, 1, # 2.5, 36, 1, 2, # 2.5, 36, 1, 1, # 2.5, 56, 3, 1, # 2.5, 80, 1, 2, # 2.5, 80, 4, 1, # 2.5, 88, 1, 2, # 2.5, 96, 2, 1, # 2.5, 114, 1, 1], # [3, 16, 2, 1, # 3, 32, 1, 2, # 3, 32, 1, 1, # 3, 48, 3, 1, # 3, 72, 1, 2, # 3, 72, 4, 1, # 3, 80, 1, 2, # 3, 88, 2, 1, # 3, 106, 1, 1] # ]) } bo_config = { "num_samples": 100, "config": { 'conf_dir_file': conf_dir_file, } } algo = BayesOptSearch(search_config, max_concurrent=1, metric="mean_accuracy", mode="max", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) scheduler = AsyncHyperBandScheduler(metric='mean_accuracy', mode='max') analysis = tune.run(train, scheduler=scheduler, search_alg=algo, resources_per_trial={'gpu': 1}, stop={"train_epoch": 3}, **bo_config) print("Best config: ", analysis.get_best_config(metric="mean_accuracy")) print('archtecture_search() Done.')
import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler from shutil import copyfile ray.init() exp_name = args.outdir outdir = os.path.join(os.environ['HOME'], 'ray_results', exp_name) if not os.path.exists(outdir): os.mkdir(outdir) # Copy this source file to the output directory for record keeping copyfile(__file__, os.path.join(outdir, 'search.py')) sched = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="neg_mean_loss", max_t=200, grace_period=120) # Select which networks to run if args.type is not None: if args.type == 'nets': type_ = list(nets.keys()) elif args.type == 'nets1': type_ = list(nets1.keys()) elif args.type == 'nets2': type_ = list(nets2.keys()) else: type_ = args.type else: type_ = list(nets.keys()) + [ 'ref',
) parser.add_argument( "--server-address", type=str, default=None, required=False, help="The address of server to connect to if using Ray Client.", ) args, _ = parser.parse_known_args() if args.server_address is not None: ray.init(f"ray://{args.server_address}") else: ray.init(address=args.ray_address) # AsyncHyperBand enables aggressive early stopping of bad trials. scheduler = AsyncHyperBandScheduler(grace_period=5, max_t=100) # 'training_iteration' is incremented every time `trainable.step` is called stopping_criteria = {"training_iteration": 1 if args.smoke_test else 9999} analysis = tune.run( easy_objective, name="asynchyperband_test", metric="mean_loss", mode="min", scheduler=scheduler, stop=stopping_criteria, num_samples=20, verbose=1, resources_per_trial={"cpu": 1, "gpu": 0}, config={ # Hyperparameter space
parser.add_argument('--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() mnist_spec = { 'run': train, 'num_samples': 10, 'stop': { 'mean_accuracy': 0.99, 'timesteps_total': 600, }, 'config': { 'activation': grid_search(['relu', 'elu', 'tanh']), }, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 2 mnist_spec['num_samples'] = 1 ray.init() from ray.tune.schedulers import AsyncHyperBandScheduler run_experiments({'tune_mnist_test': mnist_spec}, scheduler=AsyncHyperBandScheduler( time_attr="timesteps_total", reward_attr="mean_accuracy", max_t=600, ))
def distributed_bo(loss: Callable, space: Dict, metric: str, mode: str = "min", patience: int = 5, name: str = "gaussian_process", random_search_steps: int = 5, bo_steps: int = 500, resources_per_trial: Dict = None, config: Dict = None): """Executes a distributed bayesian optimization on a Ray cluster. Usage examples -------------------- Parameters -------------------- loss: Callable, Loss function to be computed. space: Dict, The space of parameters to explore. metric: str, The metric passed by the loss function to consider. mode: str = "min", The optimization direction. patience: int = 10, Early stopping patience. name: str = "gaussian_process", Name of the distributed BO experiment. random_search_steps: int = 10, Number of the initial random search. bo_steps: int = 500, Number of the steps to run in the Bayesian Optimization. resources_per_trial: Dict = None, Resources to use for each node, by default: {"cpu": 1, "gpu": 0} config: Dict = None, Configuration to pass to the function. """ if config is None: config = {} if resources_per_trial is None: resources_per_trial = {"cpu": 1, "gpu": 0} # Scheduler for the experiments hyperband = AsyncHyperBandScheduler(time_attr="training_iteration", metric=metric, mode=mode) # Following bayesian optimization gp = BayesOptSearch(space, metric=metric, mode=mode, random_search_steps=random_search_steps) # Execution of the BO. return tune.run( loss, name=name, stop=EarlyStopping(metric, mode=mode, patience=patience), local_dir=name, scheduler=hyperband, search_alg=gp, config=config, num_samples=bo_steps + random_search_steps, # Number of iterations resources_per_trial=resources_per_trial, raise_on_failed_trial=False, verbose=0)