def test_convergence_gaussian_process(self): np.random.seed(0) ray.init(local_mode=True, num_cpus=1, num_gpus=1) space = { "x": (0, 20) # This is the space of parameters to explore } resources_per_trial = {"cpu": 1, "gpu": 0} # Following bayesian optimization gp = BayesOptSearch(space, metric="loss", mode="min", random_search_steps=10) gp.repeat_float_precision = 5 gp = ConcurrencyLimiter(gp, 1) # Execution of the BO. analysis = tune.run( loss, # stop=EarlyStopping("loss", mode="min", patience=5), search_alg=gp, config={}, num_samples=100, # Number of iterations resources_per_trial=resources_per_trial, raise_on_failed_trial=False, fail_fast=True, verbose=1) assert len(analysis.trials) == 41 ray.shutdown()
def test_convergence_gaussian_process(self): np.random.seed(0) ray.init(local_mode=True, num_cpus=1, num_gpus=1) # This is the space of parameters to explore space = {"x": tune.uniform(0, 20)} resources_per_trial = {"cpu": 1, "gpu": 0} # Following bayesian optimization gp = BayesOptSearch(random_search_steps=10) gp.repeat_float_precision = 5 gp = ConcurrencyLimiter(gp, 1) # Execution of the BO. analysis = tune.run( loss, metric="loss", mode="min", # stop=EarlyStopping("loss", mode="min", patience=5), search_alg=gp, config=space, num_samples=100, # Number of iterations resources_per_trial=resources_per_trial, raise_on_failed_trial=False, fail_fast=True, verbose=1) assert len(analysis.trials) in {13, 40, 43} # it is 43 on the cluster? assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-5)
def testBayesOpt(self): from ray.tune.suggest.bayesopt import BayesOptSearch searcher = BayesOptSearch( space=self.config, metric=self.metric_name, mode="max") self._save(searcher) searcher = BayesOptSearch( space=self.config, metric=self.metric_name, mode="max") self._restore(searcher)
def testConvergenceBayesOpt(self): from ray.tune.suggest.bayesopt import BayesOptSearch np.random.seed(0) # Following bayesian optimization searcher = BayesOptSearch(random_search_steps=10) searcher.repeat_float_precision = 5 searcher = ConcurrencyLimiter(searcher, 1) analysis = self._testConvergence(searcher, patience=100) assert len(analysis.trials) < 50 assert math.isclose(analysis.best_config["x"], 0, abs_tol=1e-5)
def run_async_hyperband(smoke_test=False, expname="test", obs_noise_std=0, action_noise_std=0, params={}): if smoke_test: grace_period = 1 max_t = 5 num_samples = 3 num_cpu = 1 num_gpu = 0 num_total_cpu = 1 NBATCH_STANDARD = 10 else: grace_period = 5 max_t = 1e6 // 40 #this doesn't actually mean anything. Trainable takes care of killing processes when they go on for too long num_samples = 30 #30 num_cpu = 1 #10 num_total_cpu = 12 num_gpu = 0 space = alg_to_config(params['alg'], params['env_name'])[3] bayes_opt = BayesOptSearch( space, max_concurrent=num_total_cpu, #metric="mean_loss", #mode="min", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) ahb = tune.schedulers.AsyncHyperBandScheduler( time_attr="training_iteration", reward_attr="success_rate", grace_period=grace_period, #int(4.67e5/10.0), max_t=max_t) #int(4.67e5/4.0)) #params = {'env_name':"FetchPush-v1", 'alg' : "ppo2", 'exp_name' : expname} return tune.run_experiments( { "asynchyperband_test": { "run": make_class(params), "stop": { "done": True }, "num_samples": num_samples, "checkpoint_freq": 1, "resources_per_trial": { "cpu": num_cpu, "gpu": num_gpu, }, "config": { 'iterations': 1e6 // 40 } #alg_to_config(params['alg'], params['env_name'])[0], #just the tuneable ones } }, scheduler=ahb, search_alg=bayes_opt, queue_trials=False, verbose=0)
def tune_mnist(): sched = ASHAScheduler(time_attr="training_iteration") bayesopt = BayesOptSearch() metric = "mean_accuracy" analysis = tune.run( train_mnist, name="foo", scheduler=sched, search_alg=bayesopt, metric=metric, mode="max", #stop={ # "mean_accuracy": 0.99, # "training_iteration": num_training_iterations #}, num_samples=50, resources_per_trial={ "cpu": 1, "gpu": 0 }, config={ "dropout": tune.uniform(0.05, 0.5), "lr": tune.uniform(0.001, 0.1), "momentum": tune.uniform(0.1, 0.9), "hidden": tune.uniform(32, 512), }) print("Best hyperparameters found were: ", analysis.best_config) print("Best value for", metric, ':', analysis.best_result[metric])
def main(): #matrices = [self.env.create_instance_random(10) for _ in range(10)] ray.init() space = { #"k": ray.tune.sample_from([1,2,3,4]), "stop_param": (0, 10), "cutoff_param": (0.0005, 0.1) } config = {"config": {"iterations": 20}} algo = BayesOptSearch(space, max_concurrent=10, metric="steps", mode="min", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0 }) scheduler = AsyncHyperBandScheduler(metric="steps", mode="min") analysis = run(easy_objective, name="test1", search_alg=algo, scheduler=scheduler, **config)
def _get_search_algorithm( self, search_algorithm, config_space, metric, mode, max_concurrent): if search_algorithm == "BO": algo = BayesOptSearch( utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) algo = ConcurrencyLimiter(algo, max_concurrent=max_concurrent) scheduler = AsyncHyperBandScheduler() elif search_algorithm == "BOHB": experiment_metrics = dict(metric=metric, mode=mode) algo = TuneBOHB( config_space, max_concurrent=max_concurrent, **experiment_metrics) scheduler = HyperBandForBOHB( time_attr="training_iteration", reduction_factor=4) elif search_algorithm == "PBT": # Problem of PBT: It mutates the param value, so sometimes, it generates unacceptable values algo = None scheduler = PopulationBasedTraining( time_attr='training_iteration', perturbation_interval=2, # Every N time_attr units, "perturb" the parameters. hyperparam_mutations=config_space) elif search_algorithm == "GRID" or search_algorithm == "RANDOM": algo = None scheduler = None else: raise Exception(search_algorithm, "is not available yet") return algo, scheduler
def set_basic_conf(self, analysis=None): space = {"width": (0, 20), "height": (-100, 100)} def cost(space, reporter): reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3)) search_alg = BayesOptSearch( space, metric="loss", mode="min", analysis=analysis) return search_alg, cost
def compile( self, input_df, model_create_func, search_space, num_samples=1, stop=None, search_algorithm=None, search_algorithm_params=None, fixed_params=None, feature_transformers=None, # model=None, future_seq_len=1, validation_df=None, mc=False, metric="mse", metric_mode="min"): """ Do necessary preparations for the engine :param input_df: :param search_space: :param num_samples: :param stop: :param search_algorithm: :param search_algorithm_params: :param fixed_params: :param feature_transformers: :param model: :param validation_df: :param metric: :return: """ self.search_space = self._prepare_tune_config(search_space) self.stop_criteria = stop self.num_samples = num_samples if search_algorithm == 'BayesOpt': self.search_algorithm = BayesOptSearch( self.search_space, metric="reward_metric", mode=metric_mode, utility_kwargs=search_algorithm_params["utility_kwargs"]) else: self.search_algorithm = None self.fixed_params = fixed_params self.train_func = self._prepare_train_func( input_df=input_df, model_create_func=model_create_func, feature_transformers=feature_transformers, future_seq_len=future_seq_len, validation_df=validation_df, metric=metric, metric_mode=metric_mode, mc=mc, remote_dir=self.remote_dir)
def init_search_algorithm(search_alg, metric=None, mode=None): """Specify a search algorithm and you must pip install it first. See more details here: https://docs.ray.io/en/master/tune/api_docs/suggestion.html """ if search_alg == 'optuna': assert metric and mode, "Metric and mode cannot be None for optuna." from ray.tune.suggest.optuna import OptunaSearch return OptunaSearch(metric=metric, mode=mode) elif search_alg == 'bayesopt': assert metric and mode, "Metric and mode cannot be None for bayesian optimization." from ray.tune.suggest.bayesopt import BayesOptSearch return BayesOptSearch(metric=metric, mode=mode) logging.info(f'{search_alg} search is found, run BasicVariantGenerator().')
def testBayesOpt(self): from ray.tune.suggest.bayesopt import BayesOptSearch out = tune.run( _invalid_objective, # At least one nan, inf, -inf and float search_alg=BayesOptSearch(random_state=1234), config=self.config, mode="max", num_samples=8, reuse_actors=False) best_trial = out.best_trial self.assertLessEqual(best_trial.config["report"], 2.0)
def set_basic_conf(self): space = {"width": (0, 20), "height": (-100, 100)} def cost(space, reporter): reporter(loss=(space["height"] - 14)**2 - abs(space["width"] - 3)) search_alg = BayesOptSearch(space, max_concurrent=1, metric="loss", mode="min", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) return search_alg, cost
def main(args, reproducible: bool): if reproducible: seed_everything(42) datamodule = TwoDomainMMEDM(dataPath=args.dataPath, augment=True, batch_size=32, num_workers=8) config = { "log_lr": tune.uniform(-4, -2), "log_lrRatio": tune.uniform(-3, 0), "log_decay": tune.uniform(-8, -1), } search_alg = BayesOptSearch( metric='mean_iou', mode='max', ) scheduler = ASHAScheduler(grace_period=25, ) reporter = CLIReporter( parameter_columns=["log_lr", "log_lrRatio", "log_decay"], metric_columns=["loss", "mean_iou", "training_iteration"]) analysis = tune.run(tune.with_parameters( trainWithTune, datamodule=datamodule, num_epochs=175, num_gpus=1, ), resources_per_trial={ "cpu": 5, "gpu": 0.5, }, metric="mean_iou", mode="max", config=config, num_samples=20, scheduler=scheduler, search_alg=search_alg, progress_reporter=reporter, name="tune_minimax_segmenter") print("Best hyperparameters found were: ", analysis.best_config)
def init_search_algorithm(search_alg, metric=None, mode=None): """Specify a search algorithm and you must pip install it first. If no search algorithm is specified, the default search algorithm is BasicVariantGenerator. See more details here: https://docs.ray.io/en/master/tune/api_docs/suggestion.html Args: search_alg (str): One of 'basic_variant', 'bayesopt', or 'optuna'. metric (str): The metric to monitor for early stopping. mode (str): One of 'min' or 'max' to determine whether to minimize or maximize the metric. """ if search_alg == 'optuna': assert metric and mode, "Metric and mode cannot be None for optuna." from ray.tune.suggest.optuna import OptunaSearch return OptunaSearch(metric=metric, mode=mode) elif search_alg == 'bayesopt': assert metric and mode, "Metric and mode cannot be None for bayesian optimization." from ray.tune.suggest.bayesopt import BayesOptSearch return BayesOptSearch(metric=metric, mode=mode) logging.info(f'{search_alg} search is found, run BasicVariantGenerator().')
def get_tuner(exp,alg,param_n): param_space = cf.param_space if exp == 'EXP1': ### Experiment 1 ### max_t = 256 reduction_factor = 4 time_attr = 'time_total_s' else: ### Experiment 2 and 3 ### max_t = 27 reduction_factor = 3 time_attr = 'training_iteration' if exp == 'EXP3': ### Experiment 3 ### param_space = { k:v for k, v in cf.param_space.items() if k in cf.param_priority[:param_n]} num_samples = int(util.calculate_total_iters_hyperband(reduction_factor,max_t)[0] / max_t) search_alg = None scheduler = None stop = {time_attr: max_t} if alg == 'BayesOpt' or alg == 'Hybrid' : if 'step' in param_space: param_space['step'] = categorical_to_uniform(param_space['step']) if 'batch_size' in param_space: param_space['batch_size'] = categorical_to_uniform(param_space['batch_size']) search_alg = BayesOptSearch(metric = 'mean_accuracy', mode='max') if alg == 'HyperBand' or alg == 'Hybrid': scheduler = HyperBandScheduler( time_attr = time_attr, reduction_factor = reduction_factor, max_t = max_t) num_samples = int(util.calculate_total_iters_hyperband(reduction_factor,max_t)[1]) return param_space, num_samples, stop, scheduler, search_alg
evaluation_strategy="epoch", # evaluate at the end of every epoch weight_decay=0.01, ) ## TODO: Initialize a transformers.Trainer object and run a Bayesian ## hyperparameter search for at least 5 trials (but not too many) on the ## learning rate. Hint: use the model_init() and ## compute_metrics() methods from finetuning_utils.py as arguments to ## Trainer(). Use the hp_space parameter in hyperparameter_search() to specify ## your hyperparameter search space. (Note that this parameter takes a function ## as its value.) ## Also print out the run ID, objective value, ## and hyperparameters of your best run.from ray import tune from ray import tune from ray.tune.suggest.bayesopt import BayesOptSearch testmodel = finetuning_utils.model_init() trainer = Trainer(model=testmodel, args=training_args, train_dataset=train_data, eval_dataset=val_data, tokenizer=tokenizer, compute_metrics=finetuning_utils.compute_metrics) bestrun = trainer.hyperparameter_search( hp_space=lambda _: {"learning_rate": tune.uniform(1e-5, 5e-5)}, mode="min", backend="ray", n_trials=5, # Choose among many libraries: # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html search_alg=BayesOptSearch(), compute_objective=finetuning_utils.my_funct) print(bestrun)
space (dict) –连续的搜索空间。参数将从该空间取样,用于运行试验。 max_concurrent (int) –最大同时试验次数。默认为10。 reward_attr (str) –训练结果目标值属性。这指的是一个递增的值。 utility_kwargs (dict) – 参数来定义实用函数。必须为键 kind、kappa和xi提供值。 其中 kind 只有三个选项: ucb,ei,poi 其中ucb和xi没有关系。 ucb: mean + kappa * std, ei: (mean - y_max - xi) * norm.cdf(z)/std) + std * norm.pdf(z),其中z = (mean - y_max - xi poi: (mean - y_max - xi) * norm.cdf(z) + std * norm.pdf(z) random_state (int) –用于初始化BayesOpt。 verbose (int) – 设置BayesOpt包的复杂级别。 """ algo = BayesOptSearch(space, max_concurrent=8, reward_attr="neg_mean_loss", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }, verbose=1) scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss", brackets=3) run(easy_objective, name="my_exp", search_alg=algo, scheduler=scheduler, **config)
def compile( self, input_df, model_create_func, search_space, recipe, feature_transformers=None, # model=None, future_seq_len=1, validation_df=None, mc=False, metric="mse", metric_mode="min"): """ Do necessary preparations for the engine :param input_df: :param search_space: :param num_samples: :param stop: :param search_algorithm: :param search_algorithm_params: :param fixed_params: :param feature_transformers: :param model: :param validation_df: :param metric: :return: """ # prepare parameters for search engine runtime_params = recipe.runtime_params() num_samples = runtime_params['num_samples'] stop = dict(runtime_params) search_algorithm_params = recipe.search_algorithm_params() search_algorithm = recipe.search_algorithm() fixed_params = recipe.fixed_params() schedule_algorithm = recipe.scheduler_algorithm() del stop['num_samples'] self.search_space = self._prepare_tune_config(search_space) self.stop_criteria = stop self.num_samples = num_samples if schedule_algorithm == 'AsyncHyperBand': from ray.tune.schedulers import AsyncHyperBandScheduler self.sched = AsyncHyperBandScheduler( time_attr="training_iteration", metric="reward_metric", mode="max", max_t=50, grace_period=1, reduction_factor=3, brackets=3, ) else: from ray.tune.schedulers import FIFOScheduler self.sched = FIFOScheduler() if search_algorithm == 'BayesOpt': self.search_algorithm = BayesOptSearch( self.search_space, metric="reward_metric", mode="max", utility_kwargs=search_algorithm_params["utility_kwargs"]) elif search_algorithm == 'SkOpt': from skopt import Optimizer from ray.tune.suggest.skopt import SkOptSearch opt_params = recipe.opt_params() optimizer = Optimizer(opt_params) self.search_algorithm = SkOptSearch( optimizer, list(self.search_space.keys()), metric="reward_metric", mode="max", ) else: self.search_algorithm = None self.fixed_params = fixed_params self.train_func = self._prepare_train_func( input_df=input_df, model_create_func=model_create_func, feature_transformers=feature_transformers, future_seq_len=future_seq_len, validation_df=validation_df, metric=metric, metric_mode=metric_mode, mc=mc, remote_dir=self.remote_dir)
def distributed_bo(loss: Callable, space: Dict, metric: str, mode: str = "min", patience: int = 5, name: str = "gaussian_process", random_search_steps: int = 5, bo_steps: int = 500, resources_per_trial: Dict = None, config: Dict = None): """Executes a distributed bayesian optimization on a Ray cluster. Usage examples -------------------- Parameters -------------------- loss: Callable, Loss function to be computed. space: Dict, The space of parameters to explore. metric: str, The metric passed by the loss function to consider. mode: str = "min", The optimization direction. patience: int = 10, Early stopping patience. name: str = "gaussian_process", Name of the distributed BO experiment. random_search_steps: int = 10, Number of the initial random search. bo_steps: int = 500, Number of the steps to run in the Bayesian Optimization. resources_per_trial: Dict = None, Resources to use for each node, by default: {"cpu": 1, "gpu": 0} config: Dict = None, Configuration to pass to the function. """ if config is None: config = {} if resources_per_trial is None: resources_per_trial = {"cpu": 1, "gpu": 0} # Scheduler for the experiments hyperband = AsyncHyperBandScheduler(time_attr="training_iteration", metric=metric, mode=mode) # Following bayesian optimization gp = BayesOptSearch(space, metric=metric, mode=mode, random_search_steps=random_search_steps) # Execution of the BO. return tune.run( loss, name=name, stop=EarlyStopping(metric, mode=mode, patience=patience), local_dir=name, scheduler=hyperband, search_alg=gp, config=config, num_samples=bo_steps + random_search_steps, # Number of iterations resources_per_trial=resources_per_trial, raise_on_failed_trial=False, verbose=0)
"qlograndint": tune.qlograndint(1, 10, 2), # Round to increments of 2 "choice": tune.choice(["a", "b", "c"]), # Choose one of these options uniformly "func": tune.sample_from( lambda spec: spec.config.uniform * 0.01), # Depends on other value "grid": tune.grid_search([32, 64, 128]), # Search over all these values } # __config_end__ # __bayes_start__ from ray.tune.suggest.bayesopt import BayesOptSearch # Define the search space search_space = {"a": tune.uniform(0, 1), "b": tune.uniform(0, 20)} algo = BayesOptSearch(random_search_steps=4) tune.run( trainable, config=search_space, metric="score", mode="min", search_alg=algo, stop={"training_iteration": 20}, ) # __bayes_end__ # __hyperband_start__ from ray.tune.schedulers import HyperBandScheduler # Create HyperBand scheduler and minimize the score
def compile(self, input_df, search_space, num_samples=1, stop=None, search_algorithm=None, search_algorithm_params=None, fixed_params=None, feature_transformers=None, # model=None, future_seq_len=1, validation_df=None, mc=False, metric="mean_squared_error"): """ Do necessary preparations for the engine :param input_df: :param search_space: :param num_samples: :param stop: :param search_algorithm: :param search_algorithm_params: :param fixed_params: :param feature_transformers: :param model: :param validation_df: :param metric: :return: """ self.search_space = self._prepare_tune_config(search_space) self.stop_criteria = stop self.num_samples = num_samples if metric == "mse": # mode = "min" metric_op = -1 elif metric == "r2": # mode = "max" metric_op = 1 else: raise ValueError("metric can only be \"mse\" or \"r2\"") if search_algorithm == 'BayesOpt': # ray version 0.7.2 self.search_algorithm = BayesOptSearch( self.search_space, reward_attr="reward_metric", utility_kwargs=search_algorithm_params["utility_kwargs"] ) # ray version 0.7.3 # self.search_algorithm = BayesOptSearch( # self.search_space, # metric="reward_metric", # mode=mode, # utility_kwargs=search_algorithm_params["utility_kwargs"] # ) else: self.search_algorithm = None self.fixed_params = fixed_params self.train_func = self._prepare_train_func(input_df, feature_transformers, # model, future_seq_len, validation_df, metric_op, mc, self.remote_dir)
def main(args): def trainable(config): print('begin a trial') args.params = tools.AttrDict(yaml.safe_load(args.params.replace('#', ','))) args.logdir = args.logdir and os.path.expanduser(args.logdir) print('debug ', config["divergence_scale"], config["reward_loss_scale"]) with args.params.unlocked: args.params.divergence_scale = config["divergence_scale"] args.params.reward_loss_scale = config["reward_loss_scale"] # args.params.main_learning_rate = config["main_learning_rate"] args.params.test_steps = 50 # args.params.num_units = config['num_units'] args.params.test_traj = 5 training.utility.set_up_logging() experiment = training.Experiment( args.logdir, process_fn=functools.partial(process, args=args), num_runs=args.num_runs, ping_every=args.ping_every, resume_runs=args.resume_runs) for run in experiment: for test_score in run: if test_score > 1.0: tune.report(mean_score=test_score) break import ConfigSpace as CS import ConfigSpace.hyperparameters as CSH # search = { # "divergence_scale": tune.quniform(1, 30, 1), # "reward_loss_scale": tune.quniform(1, 50, 1), # } search = { "divergence_scale": tune.grid_search([0.1, 1, 2, 3, 5, 10]), "reward_loss_scale": tune.grid_search([1, 2, 5, 10, 20]), } config_space = CS.ConfigurationSpace(seed=1234) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name="divergence_scale", lower=1, upper=30)) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name="reward_loss_scale", lower=1, upper=50)) # config_space.add_hyperparameter( # CSH.UniformFloatHyperparameter("main_learning_rate", lower=0.0001, upper=0.05, log=True)) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter("main_learning_rate", lower=1, upper=500, log=True)) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name="num_units", lower=150, upper=400, q=50)) bayesopt = BayesOptSearch(metric="mean_loss", mode="min") bohb_hyperband = HyperBandForBOHB(metric="mean_score", mode="max", time_attr="training_iteration", max_t=30, reduction_factor=3) bohb_search = TuneBOHB(space=config_space, max_concurrent=1, metric="mean_score", mode="max") bayesopt = BayesOptSearch(max_concurrent=3, metric="mean_score", mode="max") asha = ASHAScheduler(metric="mean_score", mode="max", grace_period=6, reduction_factor=3) analysis = tune.run( trainable, config=search, num_samples=3, scheduler=asha, resources_per_trial={"cpu": 16, "gpu": 1}, stop={"training_iteration": 13}, # search_alg=bayesopt, log_to_file=True ) df = analysis.results_df print("Best config: ", analysis.get_best_config( metric="mean_score", mode="min")) print(df)
def search_neurons(): neuron_config_space = search_training_hyperparameters() experiment_metrics = dict(metric="accuracy", mode="max") hpn = list(neuron_config_space.keys()) #pre-load data to avoid races load_data() scheduler = ASHAScheduler( max_t=oom, reduction_factor=2, # grace_period=3, **experiment_metrics) search = BayesOptSearch(**experiment_metrics) search = ConcurrencyLimiter(search, max_concurrent=max_concurrent_trials) reporter = JupyterNotebookReporter( overwrite=True, parameter_columns=hpn, # max_progress_rows=num_samples, max_report_frequency=10, **experiment_metrics) result = tune.run(search_neural_arch, verbose=3, name="neurons", local_dir=r.absolute(), resources_per_trial={ "cpu": cpu_use, "gpu": gpu_use }, max_failures=3, num_samples=num_samples, config=neuron_config_space, scheduler=scheduler, search_alg=search, queue_trials=True, progress_reporter=reporter) best_trial = result.get_best_trial("accuracy", "max", "last") escape_pod = best_trial print("Best training hyperparameters: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) print("Best trial final validation accuracy: {}".format( best_trial.last_result["accuracy"])) best_checkpoint_dir = best_trial.checkpoint.value first, second = torch.load(os.path.join(best_checkpoint_dir, "checkpoint")) arch_state, model_state = {}, {} if (type(second) == tuple): arch_state, model_state = second else: arch_state, model_state = first, second best_trained_model = Net(arch_state) best_trained_model.load_state_dict(model_state) device = "cpu" if torch.cuda.is_available(): device = "cuda" best_trained_model.to(device) test_acc = test_accuracy(best_trained_model, device) print("Best trial test set accuracy: {}".format(test_acc)) return best_trained_model
def search(runner, conf_dir_file): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1" def train_once(runner): for epoch in range(runner.train_epochs): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() runner.model.train() for i, data in enumerate(runner.tr_loader): images, labels = data if conf.get()['cuda']['avail']: images, labels = images.to(runner.device), labels.to( runner.device) runner.model = runner.model.to(runner.device) runner.optimizer.zero_grad() outputs, loss = runner.regularizer(images, labels) loss.backward() runner.optimizer.step() ttop1, ttop5 = runner.accuracy(outputs, labels, (1, 5)) losses.update(loss.item(), images.size(0)) top1.update(ttop1.item(), images.size(0)) top5.update(ttop5.item(), images.size(0)) print( '[{:d}/{:d}] <<<TRAIN>>> lr({:.10f}) loss({:.4f}) top1({:.3f}) top5({:.3f})' .format(epoch + 1, runner.train_epochs, runner.optimizer.param_groups[0]['lr'], losses.avg, top1.avg, top5.avg)) runner.scheduler.step() def train(config): conf_dir_file = config['conf_dir_file'] my_conf = Config(filename=conf_dir_file) for key, value in config.items(): if key != 'conf_dir_file': my_conf.get()['model'][key] = value my_conf.get()['model']['name'] = 'rexnetv1_search' my_conf.get()['model']['input_ch'] = int( my_conf.get()['model']['input_ch']) my_conf.get()['model']['final_ch'] = int( my_conf.get()['model']['final_ch']) my_conf.get()['model']['use_se'] = round( my_conf.get()['model']['use_se']) my_conf.get()['model']['se_ratio'] = int( my_conf.get()['model']['se_ratio']) # my_conf.get()['model']['lr'] = config['lr'] # my_conf.get()['optimizer']['name'] = config['optimizer'] # my_conf.get()['scheduler']['name'] = config['scheduler'] # my_conf.get()['model']['config'] = np.array(config['network_block_cfg']).reshape(-1,4).tolist() my_runner = Runner(my_conf) train_once(my_runner) my_mean_accuracy = my_runner.best_acc_top1 tune.report(mean_accuracy=my_mean_accuracy) ray.init(configure_logging=False) search_config = { # "conf_dir_file": hp.choice('conf_dir_file', [conf_dir_file]), "input_ch": (16, 32), "final_ch": (180, 320), "width_mult": (1.0, 3.0), "depth_mult": (1.0, 3.0), "use_se": (False, True), "se_ratio": (6, 24), "dropout_ratio": (0.1, 0.5), # "bn_momentum": (0.1, 0.9), "lr": (0.001, 0.125) # "optimizer": tune.choice(['SGD','SGDP','Adam','AdamP']), # "scheduler": tune.choice(['CosineAnnealingLR','MultiStepLR']), # "network_block_cfg": tune.grid_search([ # [2.5, 20, 2, 1, # 2.5, 36, 1, 2, # 2.5, 36, 1, 1, # 2.5, 56, 3, 1, # 2.5, 80, 1, 2, # 2.5, 80, 4, 1, # 2.5, 88, 1, 2, # 2.5, 96, 2, 1, # 2.5, 114, 1, 1], # [3, 16, 2, 1, # 3, 32, 1, 2, # 3, 32, 1, 1, # 3, 48, 3, 1, # 3, 72, 1, 2, # 3, 72, 4, 1, # 3, 80, 1, 2, # 3, 88, 2, 1, # 3, 106, 1, 1] # ]) } bo_config = { "num_samples": 100, "config": { 'conf_dir_file': conf_dir_file, } } algo = BayesOptSearch(search_config, max_concurrent=1, metric="mean_accuracy", mode="max", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) scheduler = AsyncHyperBandScheduler(metric='mean_accuracy', mode='max') analysis = tune.run(train, scheduler=scheduler, search_alg=algo, resources_per_trial={'gpu': 1}, stop={"train_epoch": 3}, **bo_config) print("Best config: ", analysis.get_best_config(metric="mean_accuracy")) print('archtecture_search() Done.')
def hparams(algorithm, scheduler, num_samples, tensorboard, bare): from glob import glob import tensorflow.summary from tensorflow import random as tfrandom, int64 as tfint64 from ray import init as init_ray, shutdown as shutdown_ray from ray import tune from wandb.ray import WandbLogger from wandb import sweep as wandbsweep from wandb.apis import CommError as wandbCommError # less summaries are logged if MLENCRYPT_TB is TRUE (for efficiency) # TODO: use tf.summary.record_if? environ["MLENCRYPT_TB"] = str(tensorboard).upper() environ["MLENCRYPT_BARE"] = str(bare).upper() if getenv('MLENCRYPT_TB', 'FALSE') == 'TRUE' and \ getenv('MLENCRYPT_BARE', 'FALSE') == 'TRUE': raise ValueError('TensorBoard logging cannot be enabled in bare mode.') logdir = f'logs/hparams/{datetime.now()}' # "These results show that K = 3 is the optimal choice for the # cryptographic application of neural synchronization. K = 1 and K = 2 are # too insecure in regard to the geometric attack. And for K > 3 the effort # of A and B grows exponentially with increasing L, while the simple attack # is quite successful in the limit K -> infinity. Consequently, one should # only use Tree Parity Machines with three hidden units for the neural # key-exchange protocol." (Ruttor, 2006) # https://arxiv.org/pdf/0711.2411.pdf#page=59 update_rules = [ 'random-same', # 'random-different-A-B-E', 'random-different-A-B', 'hebbian', 'anti_hebbian', 'random_walk' ] K_bounds = {'min': 4, 'max': 8} N_bounds = {'min': 4, 'max': 8} L_bounds = {'min': 4, 'max': 8} # TODO: don't use *_bounds.values() since .values doesn't preserve order def get_session_num(logdir): current_runs = glob(join(logdir, "run-*")) if current_runs: last_run_path = current_runs[-1] last_run_session_num = int(last_run_path.split('-')[-1]) return last_run_session_num + 1 else: # there are no runs yet, start at 0 return 0 def trainable(config, reporter): """ Args: config (dict): Parameters provided from the search algorithm or variant generation. """ if not isinstance(config['update_rule'], str): update_rule = update_rules[int(config['update_rule'])] else: update_rule = config['update_rule'] K, N, L = int(config['K']), int(config['N']), int(config['L']) run_name = f"run-{get_session_num(logdir)}" run_logdir = join(logdir, run_name) # for each attack, the TPMs should start with the same weights initial_weights_tensors = get_initial_weights(K, N, L) training_steps_ls = {} eve_scores_ls = {} losses_ls = {} # for each attack, the TPMs should use the same inputs seed = tfrandom.uniform([], minval=0, maxval=tfint64.max, dtype=tfint64).numpy() for attack in ['none', 'geometric']: initial_weights = { tpm: weights_tensor_to_variable(weights, tpm) for tpm, weights in initial_weights_tensors.items() } tfrandom.set_seed(seed) if tensorboard: attack_logdir = join(run_logdir, attack) attack_writer = tensorflow.summary.create_file_writer( attack_logdir) with attack_writer.as_default(): training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) else: training_steps, sync_scores, loss = run( update_rule, K, N, L, attack, initial_weights) training_steps_ls[attack] = training_steps eve_scores_ls[attack] = sync_scores losses_ls[attack] = loss avg_training_steps = tensorflow.math.reduce_mean( list(training_steps_ls.values())) avg_eve_score = tensorflow.math.reduce_mean( list(eve_scores_ls.values())) mean_loss = tensorflow.math.reduce_mean(list(losses_ls.values())) reporter( avg_training_steps=avg_training_steps.numpy(), avg_eve_score=avg_eve_score.numpy(), mean_loss=mean_loss.numpy(), done=True, ) if algorithm == 'hyperopt': from hyperopt import hp as hyperopt from hyperopt.pyll.base import scope from ray.tune.suggest.hyperopt import HyperOptSearch space = { 'update_rule': hyperopt.choice( 'update_rule', update_rules, ), 'K': scope.int(hyperopt.quniform('K', *K_bounds.values(), q=1)), 'N': scope.int(hyperopt.quniform('N', *N_bounds.values(), q=1)), 'L': scope.int(hyperopt.quniform('L', *L_bounds.values(), q=1)), } algo = HyperOptSearch( space, metric='mean_loss', mode='min', points_to_evaluate=[ { 'update_rule': 0, 'K': 3, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 8 }, { 'update_rule': 0, 'K': 8, 'N': 16, 'L': 128 }, ], ) elif algorithm == 'bayesopt': from ray.tune.suggest.bayesopt import BayesOptSearch space = { 'update_rule': (0, len(update_rules)), 'K': tuple(K_bounds.values()), 'N': tuple(N_bounds.values()), 'L': tuple(L_bounds.values()), } algo = BayesOptSearch( space, metric="mean_loss", mode="min", # TODO: what is utility_kwargs for and why is it needed? utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) elif algorithm == 'nevergrad': from ray.tune.suggest.nevergrad import NevergradSearch from nevergrad import optimizers from nevergrad import p as ngp algo = NevergradSearch( optimizers.TwoPointsDE( ngp.Instrumentation( update_rule=ngp.Choice(update_rules), K=ngp.Scalar(lower=K_bounds['min'], upper=K_bounds['max']).set_integer_casting(), N=ngp.Scalar(lower=N_bounds['min'], upper=N_bounds['max']).set_integer_casting(), L=ngp.Scalar(lower=L_bounds['min'], upper=L_bounds['max']).set_integer_casting(), )), None, # since the optimizer is already instrumented with kwargs metric="mean_loss", mode="min") elif algorithm == 'skopt': from skopt import Optimizer from ray.tune.suggest.skopt import SkOptSearch optimizer = Optimizer([ update_rules, tuple(K_bounds.values()), tuple(N_bounds.values()), tuple(L_bounds.values()) ]) algo = SkOptSearch( optimizer, ["update_rule", "K", "N", "L"], metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'dragonfly': # TODO: doesn't work from ray.tune.suggest.dragonfly import DragonflySearch from dragonfly.exd.experiment_caller import EuclideanFunctionCaller from dragonfly.opt.gp_bandit import EuclideanGPBandit # from dragonfly.exd.experiment_caller import CPFunctionCaller # from dragonfly.opt.gp_bandit import CPGPBandit from dragonfly import load_config domain_config = load_config({ "domain": [ { "name": "update_rule", "type": "discrete", "dim": 1, "items": update_rules }, { "name": "K", "type": "int", "min": K_bounds['min'], "max": K_bounds['max'], # "dim": 1 }, { "name": "N", "type": "int", "min": N_bounds['min'], "max": N_bounds['max'], # "dim": 1 }, { "name": "L", "type": "int", "min": L_bounds['min'], "max": L_bounds['max'], # "dim": 1 } ] }) func_caller = EuclideanFunctionCaller( None, domain_config.domain.list_of_domains[0]) optimizer = EuclideanGPBandit(func_caller, ask_tell_mode=True) algo = DragonflySearch( optimizer, metric="mean_loss", mode="min", points_to_evaluate=[ ['random-same', 3, 16, 8], ['random-same', 8, 16, 8], ['random-same', 8, 16, 128], ], ) elif algorithm == 'bohb': from ConfigSpace import ConfigurationSpace from ConfigSpace import hyperparameters as CSH from ray.tune.suggest.bohb import TuneBOHB config_space = ConfigurationSpace() config_space.add_hyperparameter( CSH.CategoricalHyperparameter("update_rule", choices=update_rules)) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='K', lower=K_bounds['min'], upper=K_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='N', lower=N_bounds['min'], upper=N_bounds['max'])) config_space.add_hyperparameter( CSH.UniformIntegerHyperparameter(name='L', lower=L_bounds['min'], upper=L_bounds['max'])) algo = TuneBOHB(config_space, metric="mean_loss", mode="min") elif algorithm == 'zoopt': from ray.tune.suggest.zoopt import ZOOptSearch from zoopt import ValueType space = { "update_rule": (ValueType.DISCRETE, range(0, len(update_rules)), False), "K": (ValueType.DISCRETE, range(K_bounds['min'], K_bounds['max'] + 1), True), "N": (ValueType.DISCRETE, range(N_bounds['min'], N_bounds['max'] + 1), True), "L": (ValueType.DISCRETE, range(L_bounds['min'], L_bounds['max'] + 1), True), } # TODO: change budget to a large value algo = ZOOptSearch(budget=10, dim_dict=space, metric="mean_loss", mode="min") # TODO: use more appropriate arguments for schedulers: # https://docs.ray.io/en/master/tune/api_docs/schedulers.html if scheduler == 'fifo': sched = None # Tune defaults to FIFO elif scheduler == 'pbt': from ray.tune.schedulers import PopulationBasedTraining from random import randint sched = PopulationBasedTraining( metric="mean_loss", mode="min", hyperparam_mutations={ "update_rule": update_rules, "K": lambda: randint(K_bounds['min'], K_bounds['max']), "N": lambda: randint(N_bounds['min'], N_bounds['max']), "L": lambda: randint(L_bounds['min'], L_bounds['max']), }) elif scheduler == 'ahb' or scheduler == 'asha': # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html#asha-tune-schedulers-ashascheduler from ray.tune.schedulers import AsyncHyperBandScheduler sched = AsyncHyperBandScheduler(metric="mean_loss", mode="min") elif scheduler == 'hb': from ray.tune.schedulers import HyperBandScheduler sched = HyperBandScheduler(metric="mean_loss", mode="min") elif algorithm == 'bohb' or scheduler == 'bohb': from ray.tune.schedulers import HyperBandForBOHB sched = HyperBandForBOHB(metric="mean_loss", mode="min") elif scheduler == 'msr': from ray.tune.schedulers import MedianStoppingRule sched = MedianStoppingRule(metric="mean_loss", mode="min") init_ray( address=getenv("ip_head"), redis_password=getenv('redis_password'), ) analysis = tune.run( trainable, name='mlencrypt_research', config={ "monitor": True, "env_config": { "wandb": { "project": "mlencrypt-research", "sync_tensorboard": True, }, }, }, # resources_per_trial={"cpu": 1, "gpu": 3}, local_dir='./ray_results', export_formats=['csv'], # TODO: add other formats? num_samples=num_samples, loggers=[ tune.logger.JsonLogger, tune.logger.CSVLogger, tune.logger.TBXLogger, WandbLogger ], search_alg=algo, scheduler=sched, queue_trials=True, ) try: wandbsweep(analysis) except wandbCommError: # see https://docs.wandb.com/sweeps/ray-tune#feature-compatibility pass best_config = analysis.get_best_config(metric='mean_loss', mode='min') print(f"Best config: {best_config}") shutdown_ray()
parser.add_argument("--server-address", type=str, default=None, required=False, help="The address of server to connect to if using " "Ray Client.") args, _ = parser.parse_known_args() if args.server_address: import ray ray.util.connect(args.server_address) algo = BayesOptSearch(utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) algo = ConcurrencyLimiter(algo, max_concurrent=4) scheduler = AsyncHyperBandScheduler() analysis = tune.run(easy_objective, name="my_exp", metric="mean_loss", mode="min", search_alg=algo, scheduler=scheduler, num_samples=10 if args.smoke_test else 1000, config={ "steps": 100, "width": tune.uniform(0, 20), "height": tune.uniform(-100, 100)
def main(): parser = argparse.ArgumentParser(description="PyTorch Deep Learning") parser.add_argument("--config", default="", help="path to config file", type=str) parser.add_argument('--list', action='store_true', help='list available config in factories') parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.list: from database.data_factory import get_names as data_names from database.dataset_factory import get_names as dataset_names from database.loader_factory import get_names as loader_names from manager.manager_factory import get_names as manager_names from engine.engine_factory import get_names as engine_names print("DATA: ", data_names()) print("DATASET: ", dataset_names()) print("LOADER: ", loader_names()) print("MANAGER: ", manager_names()) print("ENGINE: ", engine_names()) sys.exit(1) if args.config != "": cfg.merge_from_file(args.config) cfg.merge_from_list(args.opts) deploy_macro(cfg) def trial_str_creator(trial): return f"{trial.trainable_name}_{trial.trial_id}" def train_with_tune(config, reporter): build_output(cfg, args.config) logger = setup_logger(cfg.OUTPUT_DIR) logger.info(cfg.OUTPUT_DIR) cfg.SOLVER.MOMENTUM = np.asscalar(config['momentum']) cfg.SOLVER.BASE_LR = np.asscalar(config['lr']) cfg.SOLVER.WARMRESTART_PERIOD = int( np.asscalar(config['restart_period'])) trainer = get_trainer(cfg.TRAINER)(cfg) trainer.train() acc = trainer.acc reporter(mean_accuracy=acc) sched = AsyncHyperBandScheduler(time_attr="training_iteration", metric="mean_accuracy") # config = { # "lr": tune.sample_from(lambda spec: 10**(-3 * np.random.rand())), # "momentum": tune.uniform(0.1, 0.9), # "restart_period": tune.randint(10,30)} space = { 'lr': (10**-3, 1.0), 'momentum': (0.1, 0.9), 'restart_period': (10, 30), } algo = BayesOptSearch(space, max_concurrent=4, metric="mean_accuracy", mode="max", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) analysis = tune.run(train_with_tune, trial_name_creator=trial_str_creator, name=cfg.EXPERIMENT, scheduler=sched, search_alg=algo, resources_per_trial={ "cpu": 2, "gpu": 1 }, num_samples=2) print( f'Best config is: {analysis.get_best_config(metric="mean_accuracy")}')
def testConvertBayesOpt(self): from ray.tune.suggest.bayesopt import BayesOptSearch config = { "a": tune.sample.Categorical([2, 3, 4]).uniform(), "b": { "x": tune.sample.Integer(0, 5).quantized(2), "y": 4, "z": tune.sample.Float(1e-4, 1e-2).loguniform() } } with self.assertRaises(ValueError): converted_config = BayesOptSearch.convert_search_space(config) config = {"b": {"z": tune.sample.Float(1e-4, 1e-2).loguniform()}} bayesopt_config = {"b/z": (1e-4, 1e-2)} converted_config = BayesOptSearch.convert_search_space(config) searcher1 = BayesOptSearch(space=converted_config, metric="none") searcher2 = BayesOptSearch(space=bayesopt_config, metric="none") config1 = searcher1.suggest("0") config2 = searcher2.suggest("0") self.assertEqual(config1, config2) self.assertLess(1e-4, config1["b"]["z"]) self.assertLess(config1["b"]["z"], 1e-2) searcher = BayesOptSearch() invalid_config = {"a/b": tune.uniform(4.0, 8.0)} with self.assertRaises(ValueError): searcher.set_search_properties("none", "max", invalid_config) invalid_config = {"a": {"b/c": tune.uniform(4.0, 8.0)}} with self.assertRaises(ValueError): searcher.set_search_properties("none", "max", invalid_config) searcher = BayesOptSearch(metric="b", mode="max") analysis = tune.run(_mock_objective, config=config, search_alg=searcher, num_samples=1) trial = analysis.trials[0] self.assertLess(trial.config["b"]["z"], 1e-2) mixed_config = {"a": tune.uniform(5, 6), "b": (8., 9.)} searcher = BayesOptSearch(space=mixed_config, metric="a", mode="max") config = searcher.suggest("0") self.assertTrue(5 <= config["a"] <= 6) self.assertTrue(8 <= config["b"] <= 9)
help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() space = {"width": (0, 20), "height": (-100, 100)} config = { "num_samples": 10 if args.smoke_test else 1000, "config": { "iterations": 100, }, "stop": { "timesteps_total": 100 } } algo = BayesOptSearch(space, max_concurrent=4, metric="mean_loss", mode="min", utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") run(easy_objective, name="my_exp", search_alg=algo, scheduler=scheduler, **config)