def basicSetup(self, resample_prob=0.0, explore=None, perturbation_interval=10, log_config=False, step_once=True): pbt = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=perturbation_interval, resample_probability=resample_prob, quantile_fraction=0.25, hyperparam_mutations={ "id_factor": [100], "float_factor": lambda: 100.0, "int_factor": lambda: 10, }, custom_explore_fn=explore, log_config=log_config) runner = _MockTrialRunner(pbt) for i in range(5): trial = _MockTrial( i, { "id_factor": i, "float_factor": 2.0, "const_factor": 3, "int_factor": 10 }) runner.add_trial(trial) trial.status = Trial.RUNNING if step_once: self.assertEqual( pbt.on_trial_result(runner, trial, result(10, 50 * i)), TrialScheduler.CONTINUE) pbt.reset_stats() return pbt, runner
def basicSetup(self, resample_prob=0.0, explore=None): pbt = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=10, resample_probability=resample_prob, hyperparam_mutations={ "id_factor": [100], "float_factor": lambda: 100.0, "int_factor": lambda: 10, }, custom_explore_fn=explore) runner = _MockTrialRunner(pbt) for i in range(5): trial = _MockTrial( i, { "id_factor": i, "float_factor": 2.0, "const_factor": 3, "int_factor": 10 }) runner.add_trial(trial) trial.status = Trial.RUNNING self.assertEqual( pbt.on_trial_result(runner, trial, result(10, 50 * i)), TrialScheduler.CONTINUE) pbt.reset_stats() return pbt, runner
def run(args, parser): if args.config_file: with open(args.config_file) as f: experiments = yaml.load(f) if hasattr(args, 'restore'): key = list(experiments.keys())[0] experiments[key]['restore'] = args.restore ray.init( redis_address=args.redis_address, num_cpus=args.ray_num_cpus, num_gpus=args.ray_num_gpus) pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="cap_mean", perturbation_interval=5, hyperparam_mutations={ 'gamma': lambda: random.uniform(0.9999, 0.90), 'num_sgd_iter': [10, 20, 30], 'lr': lambda: random.uniform(0.00001, 1), 'sgd_minibatch_size': [2048, 40960, 10240, 20480], "entropy_coeff": lambda: random.uniform(0, 0.1), "clip_param": lambda: random.uniform(0.0, 0.3), # Allow perturbations within this set of categorical values. }) run_experiments( experiments, scheduler=pbt, queue_trials=args.queue_trials)
def _get_search_algorithm( self, search_algorithm, config_space, metric, mode, max_concurrent): if search_algorithm == "BO": algo = BayesOptSearch( utility_kwargs={ "kind": "ucb", "kappa": 2.5, "xi": 0.0 }) algo = ConcurrencyLimiter(algo, max_concurrent=max_concurrent) scheduler = AsyncHyperBandScheduler() elif search_algorithm == "BOHB": experiment_metrics = dict(metric=metric, mode=mode) algo = TuneBOHB( config_space, max_concurrent=max_concurrent, **experiment_metrics) scheduler = HyperBandForBOHB( time_attr="training_iteration", reduction_factor=4) elif search_algorithm == "PBT": # Problem of PBT: It mutates the param value, so sometimes, it generates unacceptable values algo = None scheduler = PopulationBasedTraining( time_attr='training_iteration', perturbation_interval=2, # Every N time_attr units, "perturb" the parameters. hyperparam_mutations=config_space) elif search_algorithm == "GRID" or search_algorithm == "RANDOM": algo = None scheduler = None else: raise Exception(search_algorithm, "is not available yet") return algo, scheduler
def run_tune_pbt(): pbt = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=4, hyperparam_mutations={ # distribution for resampling "lr": lambda: random.uniform(0.0001, 0.02), # allow perturbations within this set of categorical values "some_other_factor": [1, 2], }, ) analysis = tune.run( pbt_function, name="pbt_test", scheduler=pbt, verbose=False, metric="mean_accuracy", mode="max", stop={ "training_iteration": 30, }, num_samples=8, fail_fast=True, config={ "lr": 0.0001, # note: this parameter is perturbed but has no effect on # the model training in this example "some_other_factor": 1, }, ) print("Best hyperparameters found were: ", analysis.best_config)
def main(): # parse config parser = flags.get_parser() args, override_args = parser.parse_known_args() config = build_config(args, override_args) # add parameters to tune using grid or random search config["lr"] = tune.loguniform(0.0001, 0.01) # define scheduler scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="val_loss", mode="min", perturbation_interval=1, hyperparam_mutations={ "lr": tune.loguniform(0.000001, 0.01), }, ) # ray init ray.init( address="auto", _node_ip_address=os.environ["ip_head"].split(":")[0], _redis_password=os.environ["redis_password"], ) # define command line reporter reporter = CLIReporter( print_intermediate_tables=True, metric="val_loss", mode="min", metric_columns={ "act_lr": "act_lr", "steps": "steps", "epochs": "epochs", "training_iteration": "training_iteration", "val_loss": "val_loss", "val_forces_mae": "val_forces_mae", }, ) # define run parameters analysis = tune.run( ocp_trainable, resources_per_trial={ "cpu": 8, "gpu": 1 }, config=config, stop={"epochs": 12}, # time_budget_s=28200, fail_fast=False, local_dir=config.get("run_dir", "./"), num_samples=8, progress_reporter=reporter, scheduler=scheduler, ) print( "Best config is:", analysis.get_best_config(metric="val_forces_mae", mode="min", scope="last"), )
def resolve_early_stopping(early_stopping, max_iters, metric_name): if isinstance(early_stopping, str): if early_stopping in TuneBaseSearchCV.defined_schedulers: if early_stopping == "PopulationBasedTraining": return PopulationBasedTraining(metric=metric_name, mode="max") elif early_stopping == "AsyncHyperBandScheduler": return AsyncHyperBandScheduler(metric=metric_name, mode="max", max_t=max_iters) elif early_stopping == "HyperBandScheduler": return HyperBandScheduler(metric=metric_name, mode="max", max_t=max_iters) elif early_stopping == "MedianStoppingRule": return MedianStoppingRule(metric=metric_name, mode="max") elif early_stopping == "ASHAScheduler": return ASHAScheduler(metric=metric_name, mode="max", max_t=max_iters) raise ValueError( "{} is not a defined scheduler. " "Check the list of available schedulers.".format(early_stopping)) elif isinstance(early_stopping, TrialScheduler): early_stopping._metric = metric_name early_stopping._mode = "max" return early_stopping else: raise TypeError("`early_stopping` must be a str, boolean, " f"or tune scheduler. Got {type(early_stopping)}.")
def testPermutationContinuationFunc(self): scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}) param_a = MockParam([10, 20, 30, 40]) param_b = MockParam([1.2, 0.9, 1.1, 0.8]) random.seed(100) np.random.seed(1000) tune.run( MockTrainingFunc, config={ "a": tune.sample_from(lambda _: param_a()), "b": tune.sample_from(lambda _: param_b()), "c": 1 }, fail_fast=True, num_samples=4, keep_checkpoints_num=1, checkpoint_score_attr="min-training_iteration", scheduler=scheduler, name="testPermutationContinuationFunc", stop={"training_iteration": 3})
def synchSetup(self, synch, param=None): if param is None: param = [10, 20, 30] scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}, synch=synch) param_a = MockParam(param) random.seed(100) np.random.seed(100) analysis = tune.run( self.MockTrainingFuncSync, config={ "a": tune.sample_from(lambda _: param_a()), "c": 1 }, fail_fast=True, num_samples=3, scheduler=scheduler, name="testPBTSync", stop={"training_iteration": 3}, ) return analysis
def set_algorithm(experiment_name, config): ''' Configure search algorithm. ''' if args.algorithm == 'hyperopt': algorithm = HyperOptSearch(points_to_evaluate=best_params) elif args.algorithm == 'ax': ax_client = AxClient(enforce_sequential_optimization=False) ax_client.create_experiment(name=experiment_name, parameters=config, objective_name="minimum", minimize=True) algorithm = AxSearch(ax_client=ax_client, points_to_evaluate=best_params) elif args.algorithm == 'nevergrad': algorithm = NevergradSearch( points_to_evaluate=best_params, optimizer=ng.optimizers.registry["PortfolioDiscreteOnePlusOne"]) elif args.algorithm == 'optuna': algorithm = OptunaSearch(points_to_evaluate=best_params, seed=args.seed) elif args.algorithm == 'pbt': algorithm = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=args.perturbation, hyperparam_mutations=config, synch=True) elif args.algorithm == 'random': algorithm = BasicVariantGenerator(max_concurrent=args.jobs) if args.algorithm not in ['random', 'pbt']: algorithm = ConcurrencyLimiter(algorithm, max_concurrent=args.jobs) return algorithm
def run_pbt(args): pbt_scheduler = PopulationBasedTraining(time_attr='time_total_s', reward_attr='episode_reward_mean', perturbation_interval=4 * 3600.0, hyperparam_mutations={ "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "tau": [0.005, 0.001], "target_noise": [0.01, 0.1, 0.2], "noise_scale": [0.01, 0.1, 0.2], "train_batch_size": [2048, 4096, 8192], "l2_reg": [1e-5, 1e-6, 1e-7], }) # Prepare the default settings with open(args.config, 'r') as stream: experiments = yaml.load(stream) for experiment, settings in experiments.items(): settings["env"] = ENVIRONMENT run_experiments(experiments, scheduler=pbt_scheduler)
def testNoConfig(self): def MockTrainingFunc(config): a = config["a"] b = config["b"] c1 = config["c"]["c1"] c2 = config["c"]["c2"] while True: tune.report(mean_accuracy=a * b * (c1 + c2)) scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, hyperparam_mutations={ "a": tune.uniform(0, 0.3), "b": [1, 2, 3], "c": { "c1": lambda: np.random.uniform(0.5), "c2": tune.choice([2, 3, 4]) } }, ) tune.run(MockTrainingFunc, fail_fast=True, num_samples=4, scheduler=scheduler, name="testNoConfig", stop={"training_iteration": 3})
def testPermutationContinuation(self): """ Tests continuation of runs after permutation. Sometimes, runs were continued from deleted checkpoints. This deterministic initialisation would fail when the fix was not applied. See issues #9036, #9036 """ class MockTrainable(tune.Trainable): def setup(self, config): self.iter = 0 self.a = config["a"] self.b = config["b"] self.c = config["c"] def step(self): self.iter += 1 return {"mean_accuracy": (self.a - self.iter) * self.b} def save_checkpoint(self, tmp_checkpoint_dir): checkpoint_path = os.path.join(tmp_checkpoint_dir, "model.mock") with open(checkpoint_path, "wb") as fp: pickle.dump((self.a, self.b, self.iter), fp) return tmp_checkpoint_dir def load_checkpoint(self, tmp_checkpoint_dir): checkpoint_path = os.path.join(tmp_checkpoint_dir, "model.mock") with open(checkpoint_path, "rb") as fp: self.a, self.b, self.iter = pickle.load(fp) scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}) param_a = MockParam([10, 20, 30, 40]) param_b = MockParam([1.2, 0.9, 1.1, 0.8]) random.seed(100) np.random.seed(1000) tune.run(MockTrainable, config={ "a": tune.sample_from(lambda _: param_a()), "b": tune.sample_from(lambda _: param_b()), "c": 1 }, fail_fast=True, num_samples=4, checkpoint_freq=1, checkpoint_at_end=True, keep_checkpoints_num=1, checkpoint_score_attr="min-training_iteration", scheduler=scheduler, name="testPermutationContinuation", stop={"training_iteration": 3})
def tune_mnist_pbt(): data_dir = mkdtemp(prefix="mnist_data_") LightningMNISTClassifier.download_data(data_dir) config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": 1e-3, "batch_size": 64, "data_dir": data_dir } scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="loss", mode="min", perturbation_interval=4, hyperparam_mutations={ "lr": lambda: tune.loguniform(1e-4, 1e-1).func(None), "batch_size": [32, 64, 128] }) reporter = CLIReporter( parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) tune.run( train_mnist_tune_checkpoint, resources_per_trial={"cpu": 1}, config=config, num_samples=10, scheduler=scheduler, progress_reporter=reporter) shutil.rmtree(data_dir)
def set_tuning_parameters(agent, config): scheduler = None if agent.lower() == "ppo": def explore(config): config["train_batch_size"] = max(config["train_batch_size"], 2000) # should be 4 at minimum if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: config["train_batch_size"] = config["sgd_minibatch_size"] * 2 # ensure we run at least one sgd iter if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 if config['horizon'] < 32: config['horizon'] = 32 for k in config.keys(): if k == 'use_gae': continue # that one is fine and also non numeric if config[k] < 0.0: # this...is a lazy way to make sure things are at worse 0 config[k] = 0.0 return config hyper_params = { # update frequency "horizon": random.randint(10000, 50000), "sgd_minibatch_size": random.randint(128, 16384), "train_batch_size": random.randint(2000, 160000), "num_sgd_iter": random.randint(3, 30), # Objective hyperparams: # "clip_param": random.uniform(0.01, 0.5), # "kl_target": random.uniform(0.003, 0.03), # "kl_coeff": random.uniform(0.3, 1), # "use_gae": random.choice([True, False]), # "gamma": random.choice([0.99, # random.uniform(0.8, 0.9997), # random.uniform(0.8, 0.9997)]), # "lambda": random.uniform(0.9, 1.0), # val fn & entropy coeff # "vf_loss_coeff": random.choice([0.5, 1.0]), # "entropy_coeff": random.uniform(0, 0.01), # "lr": random.uniform(5e-6, 0.003), } # creates a wide range of the potential population for k in hyper_params.keys(): config[k] = tune.sample_from(lambda spec: hyper_params[k]) scheduler = PopulationBasedTraining(time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=120, resample_probability=0.80, hyperparam_mutations=hyper_params, custom_explore_fn=explore) if agent.lower() == "ddpg": pass if agent.lower() == "pg": pass return config, scheduler
def set_tuning_parameters(agent, config): scheduler = None if agent.lower() == "ppo": # Postprocess the perturbed config to ensure it's still valid def explore(config): # ensure we collect enough timesteps to do sgd config["train_batch_size"] = max(config["train_batch_size"], 4) #should be 4 at minimum if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: config["train_batch_size"] = config["sgd_minibatch_size"] * 2 # ensure we run at least one sgd iter if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 if config['horizon'] < 32: config['horizon'] = 32 for k in config.keys(): if k == 'use_gae': continue #that one is fine and also non numeric if config[k] < 0.0: config[k] = 0.0 #this...is a lazy way to make sure things are at worse 0 return config #mutation distributions hyper_params = { #update frequency "horizon": lambda : random.randint(32, 5000), "train_batch_size": lambda: random.randint(4, 4096), "num_sgd_iter": lambda: random.randint(3, 30), #Objective hyperparams: 'clip_param': lambda: random.choice([0.1, 0.2, 0.3]), 'kl_target': lambda: random.uniform(0.003, 0.03), 'kl_coeff': lambda: random.uniform(0.3, 1), 'use_gae': lambda:random.choice([True, False]), 'gamma': lambda: random.choice([0.99, random.uniform(0.8, 0.9997), random.uniform(0.8, 0.9997)]), 'lambda': lambda: random.uniform(0.9, 1.0), #val fn & entropy coeff 'vf_loss_coeff': lambda: random.choice([0.5, 1.0]), 'entropy_coeff': lambda: random.uniform(0, 0.01), 'sgd_stepsize': lambda: random.uniform(5e-6, 0.003), } #creates a wide range of the potential population for k in hyper_params.keys(): config[k] = tune.sample_from(lambda spec: hyper_params[k]) scheduler = PopulationBasedTraining(time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=120, resample_probability=0.80, hyperparam_mutations=hyper_params, custom_explore_fn=explore) if agent.lower() == "ddpg": pass if agent.lower() == "pg": pass return config, scheduler
def tune_vl_bert(config_path, pl_ckpt_path, num_samples=10, num_epochs=10, gpus_per_trial=2): # scheduler = ASHAScheduler( # metric="loss", # mode="min", # max_t=num_epochs, # grace_period=1, # reduction_factor=2) reporter = CLIReporter( parameter_columns=[ "lr", "weight_decay", "warmup_factor", "max_epoch", "batch_size" ], metric_columns=["mean_accuracy", "training_iteration"]) param_config = { "lr": 6.25e-7, "weight_decay": tune.loguniform(1e-5, 1e-2), "batch_size": 4, "max_epoch": tune.choice([4, 6, 8, 10]), "warmup_factor": tune.uniform(0, 1), "warmup_steps": tune.uniform(100, 800), } scheduler = PopulationBasedTraining(time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=2, hyperparam_mutations={ "lr": tune.loguniform(6.25e-6, 6.25e-8), "batch_size": [1, 2, 3, 4], }) update_config(config_path) model_base_cfg = copy.deepcopy(config) tune.run(partial( _tune, vl_bert_config=model_base_cfg, pl_ckpt_path=pl_ckpt_path, num_gpus=gpus_per_trial, ), resources_per_trial={ "cpu": 4, "gpu": gpus_per_trial, }, config=param_config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_vl_bert")
def testMemoryCheckpointFree(self): class MyTrainable(Trainable): def setup(self, config): # Make sure this is large enough so ray uses object store # instead of in-process store. self.large_object = random.getrandbits(int(10e6)) self.iter = 0 self.a = config["a"] def step(self): self.iter += 1 return {"metric": self.iter + self.a} def save_checkpoint(self, checkpoint_dir): file_path = os.path.join(checkpoint_dir, "model.mock") with open(file_path, "wb") as fp: pickle.dump((self.large_object, self.iter, self.a), fp) return file_path def load_checkpoint(self, path): with open(path, "rb") as fp: self.large_object, self.iter, self.a = pickle.load(fp) class CustomExecutor(RayTrialExecutor): def save(self, *args, **kwargs): checkpoint = super(CustomExecutor, self).save(*args, **kwargs) assert object_memory_usage() <= (12 * 80e6) return checkpoint param_a = MockParam([1, -1]) pbt = PopulationBasedTraining( time_attr="training_iteration", metric="metric", mode="max", perturbation_interval=1, hyperparam_mutations={"b": [-1]}, ) tune.run( MyTrainable, name="ray_demo", scheduler=pbt, stop={"training_iteration": 10}, num_samples=3, checkpoint_freq=1, fail_fast=True, config={"a": tune.sample_from(lambda _: param_a())}, trial_executor=CustomExecutor(queue_trials=False, reuse_actors=False), )
def run_pbt(args): """ Run population based training """ pbt_scheduler = PopulationBasedTraining( time_attr='time_total_s', metric="episode_reward_mean", mode="max", perturbation_interval=600.0, hyperparam_mutations={ "tau": lambda: random.uniform(0.001, 0.005), "optimization": { "actor_learning_rate": log_uniform(1e-3, 1e-5), "critic_learning_rate": log_uniform(1e-3, 1e-5), "entropy_learning_rate": log_uniform(1e-3, 1e-5), } }) with open(args.config, 'r') as stream: experiments = yaml.load(stream, Loader=yaml.Loader) for experiment_name, settings in experiments.items(): print("Running %s"%experiment_name) config = settings['config'] config.update({ "learning_starts": sample_from( lambda spec: random.choice([10000, 20000])), "target_network_update_freq": sample_from( lambda spec: random.choice([0, 10, 100])), "buffer_size": sample_from( lambda spec: int(random.choice([1e6, 2e6, 4e6, 8e6]))), "sample_batch_size": sample_from( lambda spec: int(random.choice([1,4,8]))), "train_batch_size": sample_from( lambda spec: int(random.choice([128,256,512]))), }) # Hard overrides from this file and the commandline config = extend_config(config, get_callbacks()) config = extend_config(config, dict(env_config=SCENARIO)) config = extend_config(config, config_from_args(args)) ray.tune.run( settings['run'], name=experiment_name, scheduler=pbt_scheduler, restore=SCENARIO["checkpoint"], config=config, checkpoint_freq=20, max_failures=5, num_samples=6 )
def tune_example(num_workers=1, use_gpu=False, use_fp16=False, test_mode=False): TorchTrainable = TorchTrainer.as_trainable( model_creator=ResNet18, data_creator=cifar_creator, optimizer_creator=optimizer_creator, loss_creator=nn.CrossEntropyLoss, scheduler_creator=scheduler_creator, initialization_hook=initialization_hook, num_workers=num_workers, config={ "test_mode": test_mode, # user-defined param to subset the data BATCH_SIZE: 128 * num_workers, }, use_gpu=use_gpu, scheduler_step_freq="epoch", use_fp16=use_fp16) pbt_scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="val_loss", mode="min", perturbation_interval=1, hyperparam_mutations={ # distribution for resampling "lr": lambda: np.random.uniform(0.001, 1), # allow perturbations within this set of categorical values "momentum": [0.8, 0.9, 0.99], }) reporter = CLIReporter() reporter.add_metric_column("val_loss", "loss") reporter.add_metric_column("val_accuracy", "acc") analysis = tune.run( TorchTrainable, num_samples=4, config={ "lr": tune.choice([0.001, 0.01, 0.1]), "momentum": 0.8 }, stop={"training_iteration": 2 if test_mode else 100}, max_failures=3, # used for fault tolerance checkpoint_freq=3, # used for fault tolerance keep_checkpoints_num=1, # used for fault tolerance verbose=2, progress_reporter=reporter, scheduler=pbt_scheduler) return analysis.get_best_config(metric="val_loss", mode="min")
def get_raytune_schedule(raytune_cfg): if raytune_cfg["sched"] == "asha": return AsyncHyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["asha"]["max_t"], grace_period=raytune_cfg["asha"]["grace_period"], reduction_factor=raytune_cfg["asha"]["reduction_factor"], brackets=raytune_cfg["asha"]["brackets"], ) elif raytune_cfg["sched"] == "hyperband": return HyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) # requires pip install hpbandster ConfigSpace elif (raytune_cfg["sched"] == "bohb") or (raytune_cfg["sched"] == "BOHB"): return HyperBandForBOHB( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) elif (raytune_cfg["sched"] == "pbt") or (raytune_cfg["sched"] == "PBT"): return PopulationBasedTraining( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pbt"]["perturbation_interval"], hyperparam_mutations=raytune_cfg["pbt"]["hyperparam_mutations"], log_config=True, ) # requires pip install GPy sklearn elif (raytune_cfg["sched"] == "pb2") or (raytune_cfg["sched"] == "PB2"): return PB2( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pb2"]["perturbation_interval"], hyperparam_bounds=raytune_cfg["pb2"]["hyperparam_bounds"], log_config=True, ) else: print("INFO: Not using any Ray Tune trial scheduler.") return None
def create_pbt_scheduler(model): """ Create a population-based training (PBT) scheduler. :return: A new PBT scheduler. """ hyperparam_mutations = create_hparam_tune_dict(model=model, is_config=False) pbt = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=10, metric="episode_reward_mean", mode="max", hyperparam_mutations=hyperparam_mutations, ) return pbt
def tuning(args): activation = nn.PReLU if args.actv == 'prelu' else nn.SELU config = { "l1_units": tune.choice([480, 512, 544]), "l2_units": tune.choice([224, 256, 288]), "l3_units": tune.choice([96, 128, 160]), "lambda": tune.choice([1e-3, 1e-4, 1e-5]), "actv": tune.choice([activation]) } scheduler = PopulationBasedTraining(time_attr='training_iteration', perturbation_interval=4, hyperparam_mutations={ "l1_units": [464, 496, 528, 560, 576], "l2_units": [208, 240, 272, 304, 328], "l3_units": [80, 112, 144, 176, 208] }) reporter = CLIReporter(parameter_columns=[ "l1_units", "l2_units", "l3_units", "lambda", ], metric_columns=["loss", "training_iteration"]) analysis = tune.run(tune.with_parameters(train, batch_size=args.batch_size, num_epochs=args.num_epochs, num_gpus=args.num_gpus), resources_per_trial={ "cpu": args.num_cpus, "gpu": args.num_gpus }, metric="loss", mode="min", config=config, num_samples=args.num_trials, scheduler=scheduler, progress_reporter=reporter, max_failures=3, stop={"training_iteration": 10}, name="tune_cae") print(f"Found best hyperparameters: {analysis.best_config}")
def testPermutationContinuationFunc(self): def MockTrainingFunc(config, checkpoint_dir=None): iter = 0 a = config["a"] b = config["b"] if checkpoint_dir: checkpoint_path = os.path.join(checkpoint_dir, "model.mock") with open(checkpoint_path, "rb") as fp: a, b, iter = pickle.load(fp) while True: iter += 1 with tune.checkpoint_dir(step=iter) as checkpoint_dir: checkpoint_path = os.path.join(checkpoint_dir, "model.mock") with open(checkpoint_path, "wb") as fp: pickle.dump((a, b, iter), fp) tune.report(mean_accuracy=(a - iter) * b) scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}, ) param_a = MockParam([10, 20, 30, 40]) param_b = MockParam([1.2, 0.9, 1.1, 0.8]) random.seed(100) np.random.seed(1000) tune.run( MockTrainingFunc, config={ "a": tune.sample_from(lambda _: param_a()), "b": tune.sample_from(lambda _: param_b()), "c": 1, }, fail_fast=True, num_samples=4, keep_checkpoints_num=1, checkpoint_score_attr="min-training_iteration", scheduler=scheduler, name="testPermutationContinuationFunc", stop={"training_iteration": 3}, )
def setup_tune_scheduler(): ss, custom_explore = workload.create_sample_space() search_space = workload.create_search_space() scheduler = PopulationBasedTraining(time_attr="training_iteration", perturbation_interval=5, hyperparam_mutations=ss, custom_explore_fn=custom_explore, **workload.exp_metric()) return dict( scheduler=scheduler, config=search_space, # num_samples in PBT only sets population num_samples=10, resources_per_trial=com.detect_baseline_resource(), )
def set_tuning_parameters(agent, config): scheduler = None if agent.lower() == "ppo": # Postprocess the perturbed config to ensure it's still valid def explore(config): # ensure we collect enough timesteps to do sgd if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: config["train_batch_size"] = config["sgd_minibatch_size"] * 2 # ensure we run at least one sgd iter if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 return config # optimization related parameters # hype_params["kl_coeff"] = lambda: random.uniform(.1, .8) # hype_params["entropy_coeff"] = lambda: random.uniform(0.0, 1.0) # hype_params["kl_target"] = lambda: random.uniform(0.0, 0.05) hype_params = { "lambda": lambda: random.uniform(0.9, 1.0), "clip_param": lambda: random.uniform(0.01, 0.5), "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "num_sgd_iter": lambda: random.randint(1, 30), "sgd_minibatch_size": lambda: random.randint(128, 16384), "train_batch_size": lambda: random.randint(2000, 160000), } config["num_sgd_iter"] = tune.sample_from( lambda spec: random.choice([10, 20, 30])), config["sgd_minibatch_size"] = tune.sample_from( lambda spec: random.choice([128, 512, 2048])), config["train_batch_size"] = tune.sample_from( lambda spec: random.choice([10000, 20000, 40000])) scheduler = PopulationBasedTraining(time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=120, resample_probability=0.25, hyperparam_mutations=hype_params, custom_explore_fn=explore) if agent.lower() == "ddpg": pass if agent.lower() == "pg": pass return config, scheduler
def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0): data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_") LightningMNISTClassifier.download_data(data_dir) config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": 1e-3, "batch_size": 64, } scheduler = PopulationBasedTraining( perturbation_interval=4, hyperparam_mutations={ "lr": tune.loguniform(1e-4, 1e-1), "batch_size": [32, 64, 128] }) reporter = CLIReporter( parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) analysis = tune.run( tune.with_parameters( train_mnist_tune_checkpoint, data_dir=data_dir, num_epochs=num_epochs, num_gpus=gpus_per_trial), resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, metric="loss", mode="min", config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_mnist_pbt") print("Best hyperparameters found were: ", analysis.best_config) shutil.rmtree(data_dir)
def run_experiment(args): if args.smoke_test: args.layers = 2 if args.ray_address: ray.init(address=args.ray_address) sched = AsyncHyperBandScheduler(time_attr="training_iteration", metric="mean_accuracy") sched = PopulationBasedTraining(time_attr='time_total_s', metric='mean_accuracy', mode='max', perturbation_interval=5.0, custom_explore_fn=lambda c: { 'arch': perturb_arch(c['arch'], 4), 'use_gpu': c['use_gpu'] }) analysis = tune.run( train_cnn, name="darts", scheduler=sched, stop={ "mean_accuracy": 0.95, "training_iteration": 2 if args.smoke_test else 100 }, resources_per_trial={ "cpu": 2, "gpu": 1 # int(args.cuda) * 0.5 }, num_samples=1 if args.smoke_test else 50, config={ "args": args, "arch": tune.sample_from(lambda _: sample_arch(4)), "layers": args. layers # can use a flag to make this variable per tune worker later on }) print("Best config is:", analysis.get_best_config(metric="mean_accuracy"))
def set_tuning_parameters(agent, config): hype_params = {} explore = None if agent == "PPO": # optimization related parameters hype_params["lr"] = [ float(1e-2), float(1e-3), float(1e-4), float(1e-5) ] hype_params["train_batch_size"] = [1000, 2000, 4000] hype_params["sgd_minibatch_size"] = [16, 32, 64, 128] hype_params["num_sgd_iter"] = lambda: random.randint(1, 30) hype_params["lambda"] = random.random() # GAE param # initial coeff of KL term hype_params["kl_coeff"] = lambda: random.uniform(.1, .8) # size of clipping in PPO term hype_params["clip_param"] = lambda: random.uniform(.1, .8) hype_params["entropy_coeff"] = lambda: random.uniform( 0.0, 1.0) # entropy coeff hype_params["kl_target"] = lambda: random.uniform( 0.0, 0.05) # .1 might be a bit high explore = ppo_explore for k in hype_params: # just to give some variation at start if isinstance(hype_params[k], list) and not k == 'lr': if k == 'train_batch_size': config[k] = lambda spec: random.choice([1000, 2000, 4000]) if k == 'sgd_minibatch_size': config[k] = lambda spec: random.choice([16, 32, 64, 128]) scheduler = PopulationBasedTraining( time_attr='time_total_s', reward_attr='episode_reward_mean', # this..will be pretty sparse perturbation_interval=5000, hyperparam_mutations=hype_params, resample_probability=0.25, custom_explore_fn=explore) return config, scheduler
def run(task, name=None): ray.init() import random pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=100, hyperparam_mutations={ # Allow for scaling-based perturbations, with a uniform backing # distribution for resampling. "actor_learning_rate": lambda: random.uniform(0.01, 1.0), # Allow perturbations within this set of categorical values. "critic_learning_rate": lambda: random.uniform(0.01, 1.0), "discounting": [0.8, 0.9, 0.95, 1.0], }) # Try to find the best factor 1 and factor 2 run_experiments( { "pbt_test3": { "run": LogicRLTrainable, "stop": { "training_iteration": 8000 }, "num_samples": 6, "config": { "task": task, "name": name, "actor_learning_rate": 0.1, "critic_learning_rate": 0.1, "discounting": 1.0 }, "trial_resources": { "cpu": 2, }, }, }, scheduler=pbt, verbose=False)
def testPermutationContinuation(self): """ Tests continuation of runs after permutation. Sometimes, runs were continued from deleted checkpoints. This deterministic initialisation would fail when the fix was not applied. See issues #9036, #9036 """ scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}) param_a = MockParam([10, 20, 30, 40]) param_b = MockParam([1.2, 0.9, 1.1, 0.8]) random.seed(100) np.random.seed(1000) tune.run( MockTrainable, config={ "a": tune.sample_from(lambda _: param_a()), "b": tune.sample_from(lambda _: param_b()), "c": 1 }, fail_fast=True, num_samples=4, checkpoint_freq=1, checkpoint_at_end=True, keep_checkpoints_num=1, checkpoint_score_attr="min-training_iteration", scheduler=scheduler, name="testPermutationContinuation", stop={"training_iteration": 3})