def get_raytune_schedule(raytune_cfg): if raytune_cfg["sched"] == "asha": return AsyncHyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["asha"]["max_t"], grace_period=raytune_cfg["asha"]["grace_period"], reduction_factor=raytune_cfg["asha"]["reduction_factor"], brackets=raytune_cfg["asha"]["brackets"], ) elif raytune_cfg["sched"] == "hyperband": return HyperBandScheduler( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) # requires pip install hpbandster ConfigSpace elif (raytune_cfg["sched"] == "bohb") or (raytune_cfg["sched"] == "BOHB"): return HyperBandForBOHB( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", max_t=raytune_cfg["hyperband"]["max_t"], reduction_factor=raytune_cfg["hyperband"]["reduction_factor"], ) elif (raytune_cfg["sched"] == "pbt") or (raytune_cfg["sched"] == "PBT"): return PopulationBasedTraining( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pbt"]["perturbation_interval"], hyperparam_mutations=raytune_cfg["pbt"]["hyperparam_mutations"], log_config=True, ) # requires pip install GPy sklearn elif (raytune_cfg["sched"] == "pb2") or (raytune_cfg["sched"] == "PB2"): return PB2( metric=raytune_cfg["default_metric"], mode=raytune_cfg["default_mode"], time_attr="training_iteration", perturbation_interval=raytune_cfg["pb2"]["perturbation_interval"], hyperparam_bounds=raytune_cfg["pb2"]["hyperparam_bounds"], log_config=True, ) else: print("INFO: Not using any Ray Tune trial scheduler.") return None
def create_pbt_scheduler(model): """ Create a population-based training (PBT) scheduler. :return: A new PBT scheduler. """ hyperparam_mutations = create_hparam_tune_dict(model=model, is_config=False) pbt = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=10, metric="episode_reward_mean", mode="max", hyperparam_mutations=hyperparam_mutations, ) return pbt
def tuning(args): activation = nn.PReLU if args.actv == 'prelu' else nn.SELU config = { "l1_units": tune.choice([480, 512, 544]), "l2_units": tune.choice([224, 256, 288]), "l3_units": tune.choice([96, 128, 160]), "lambda": tune.choice([1e-3, 1e-4, 1e-5]), "actv": tune.choice([activation]) } scheduler = PopulationBasedTraining(time_attr='training_iteration', perturbation_interval=4, hyperparam_mutations={ "l1_units": [464, 496, 528, 560, 576], "l2_units": [208, 240, 272, 304, 328], "l3_units": [80, 112, 144, 176, 208] }) reporter = CLIReporter(parameter_columns=[ "l1_units", "l2_units", "l3_units", "lambda", ], metric_columns=["loss", "training_iteration"]) analysis = tune.run(tune.with_parameters(train, batch_size=args.batch_size, num_epochs=args.num_epochs, num_gpus=args.num_gpus), resources_per_trial={ "cpu": args.num_cpus, "gpu": args.num_gpus }, metric="loss", mode="min", config=config, num_samples=args.num_trials, scheduler=scheduler, progress_reporter=reporter, max_failures=3, stop={"training_iteration": 10}, name="tune_cae") print(f"Found best hyperparameters: {analysis.best_config}")
def testPermutationContinuationFunc(self): def MockTrainingFunc(config, checkpoint_dir=None): iter = 0 a = config["a"] b = config["b"] if checkpoint_dir: checkpoint_path = os.path.join(checkpoint_dir, "model.mock") with open(checkpoint_path, "rb") as fp: a, b, iter = pickle.load(fp) while True: iter += 1 with tune.checkpoint_dir(step=iter) as checkpoint_dir: checkpoint_path = os.path.join(checkpoint_dir, "model.mock") with open(checkpoint_path, "wb") as fp: pickle.dump((a, b, iter), fp) tune.report(mean_accuracy=(a - iter) * b) scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}, ) param_a = MockParam([10, 20, 30, 40]) param_b = MockParam([1.2, 0.9, 1.1, 0.8]) random.seed(100) np.random.seed(1000) tune.run( MockTrainingFunc, config={ "a": tune.sample_from(lambda _: param_a()), "b": tune.sample_from(lambda _: param_b()), "c": 1, }, fail_fast=True, num_samples=4, keep_checkpoints_num=1, checkpoint_score_attr="min-training_iteration", scheduler=scheduler, name="testPermutationContinuationFunc", stop={"training_iteration": 3}, )
def setup_tune_scheduler(): ss, custom_explore = workload.create_sample_space() search_space = workload.create_search_space() scheduler = PopulationBasedTraining(time_attr="training_iteration", perturbation_interval=5, hyperparam_mutations=ss, custom_explore_fn=custom_explore, **workload.exp_metric()) return dict( scheduler=scheduler, config=search_space, # num_samples in PBT only sets population num_samples=10, resources_per_trial=com.detect_baseline_resource(), )
def set_tuning_parameters(agent, config): scheduler = None if agent.lower() == "ppo": # Postprocess the perturbed config to ensure it's still valid def explore(config): # ensure we collect enough timesteps to do sgd if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: config["train_batch_size"] = config["sgd_minibatch_size"] * 2 # ensure we run at least one sgd iter if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 return config # optimization related parameters # hype_params["kl_coeff"] = lambda: random.uniform(.1, .8) # hype_params["entropy_coeff"] = lambda: random.uniform(0.0, 1.0) # hype_params["kl_target"] = lambda: random.uniform(0.0, 0.05) hype_params = { "lambda": lambda: random.uniform(0.9, 1.0), "clip_param": lambda: random.uniform(0.01, 0.5), "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "num_sgd_iter": lambda: random.randint(1, 30), "sgd_minibatch_size": lambda: random.randint(128, 16384), "train_batch_size": lambda: random.randint(2000, 160000), } config["num_sgd_iter"] = tune.sample_from( lambda spec: random.choice([10, 20, 30])), config["sgd_minibatch_size"] = tune.sample_from( lambda spec: random.choice([128, 512, 2048])), config["train_batch_size"] = tune.sample_from( lambda spec: random.choice([10000, 20000, 40000])) scheduler = PopulationBasedTraining(time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=120, resample_probability=0.25, hyperparam_mutations=hype_params, custom_explore_fn=explore) if agent.lower() == "ddpg": pass if agent.lower() == "pg": pass return config, scheduler
def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0): data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_") LightningMNISTClassifier.download_data(data_dir) config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": 1e-3, "batch_size": 64, } scheduler = PopulationBasedTraining( perturbation_interval=4, hyperparam_mutations={ "lr": tune.loguniform(1e-4, 1e-1), "batch_size": [32, 64, 128] }) reporter = CLIReporter( parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) analysis = tune.run( tune.with_parameters( train_mnist_tune_checkpoint, data_dir=data_dir, num_epochs=num_epochs, num_gpus=gpus_per_trial), resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, metric="loss", mode="min", config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_mnist_pbt") print("Best hyperparameters found were: ", analysis.best_config) shutil.rmtree(data_dir)
def tune_mnist_pbt(num_samples=10, num_epochs=10, gpus_per_trial=0): data_dir = mkdtemp(prefix="mnist_data_") LightningMNISTClassifier.download_data(data_dir) config = { "layer_1_size": tune.choice([32, 64, 128]), "layer_2_size": tune.choice([64, 128, 256]), "lr": 1e-3, "batch_size": 64, } scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="loss", mode="min", perturbation_interval=4, hyperparam_mutations={ "lr": lambda: tune.loguniform(1e-4, 1e-1).func(None), "batch_size": [32, 64, 128] }) reporter = CLIReporter( parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], metric_columns=["loss", "mean_accuracy", "training_iteration"]) tune.run( partial( train_mnist_tune_checkpoint, data_dir=data_dir, num_epochs=num_epochs, num_gpus=gpus_per_trial), resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter, name="tune_mnist_pbt") shutil.rmtree(data_dir)
def run_experiment(args): if args.smoke_test: args.layers = 2 if args.ray_address: ray.init(address=args.ray_address) sched = AsyncHyperBandScheduler(time_attr="training_iteration", metric="mean_accuracy") sched = PopulationBasedTraining(time_attr='time_total_s', metric='mean_accuracy', mode='max', perturbation_interval=5.0, custom_explore_fn=lambda c: { 'arch': perturb_arch(c['arch'], 4), 'use_gpu': c['use_gpu'] }) analysis = tune.run( train_cnn, name="darts", scheduler=sched, stop={ "mean_accuracy": 0.95, "training_iteration": 2 if args.smoke_test else 100 }, resources_per_trial={ "cpu": 2, "gpu": 1 # int(args.cuda) * 0.5 }, num_samples=1 if args.smoke_test else 50, config={ "args": args, "arch": tune.sample_from(lambda _: sample_arch(4)), "layers": args. layers # can use a flag to make this variable per tune worker later on }) print("Best config is:", analysis.get_best_config(metric="mean_accuracy"))
def run(task, name=None): ray.init() import random pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=100, hyperparam_mutations={ # Allow for scaling-based perturbations, with a uniform backing # distribution for resampling. "actor_learning_rate": lambda: random.uniform(0.01, 1.0), # Allow perturbations within this set of categorical values. "critic_learning_rate": lambda: random.uniform(0.01, 1.0), "discounting": [0.8, 0.9, 0.95, 1.0], }) # Try to find the best factor 1 and factor 2 run_experiments( { "pbt_test3": { "run": LogicRLTrainable, "stop": { "training_iteration": 8000 }, "num_samples": 6, "config": { "task": task, "name": name, "actor_learning_rate": 0.1, "critic_learning_rate": 0.1, "discounting": 1.0 }, "trial_resources": { "cpu": 2, }, }, }, scheduler=pbt, verbose=False)
def set_tuning_parameters(agent, config): hype_params = {} explore = None if agent == "PPO": # optimization related parameters hype_params["lr"] = [ float(1e-2), float(1e-3), float(1e-4), float(1e-5) ] hype_params["train_batch_size"] = [1000, 2000, 4000] hype_params["sgd_minibatch_size"] = [16, 32, 64, 128] hype_params["num_sgd_iter"] = lambda: random.randint(1, 30) hype_params["lambda"] = random.random() # GAE param # initial coeff of KL term hype_params["kl_coeff"] = lambda: random.uniform(.1, .8) # size of clipping in PPO term hype_params["clip_param"] = lambda: random.uniform(.1, .8) hype_params["entropy_coeff"] = lambda: random.uniform( 0.0, 1.0) # entropy coeff hype_params["kl_target"] = lambda: random.uniform( 0.0, 0.05) # .1 might be a bit high explore = ppo_explore for k in hype_params: # just to give some variation at start if isinstance(hype_params[k], list) and not k == 'lr': if k == 'train_batch_size': config[k] = lambda spec: random.choice([1000, 2000, 4000]) if k == 'sgd_minibatch_size': config[k] = lambda spec: random.choice([16, 32, 64, 128]) scheduler = PopulationBasedTraining( time_attr='time_total_s', reward_attr='episode_reward_mean', # this..will be pretty sparse perturbation_interval=5000, hyperparam_mutations=hype_params, resample_probability=0.25, custom_explore_fn=explore) return config, scheduler
def basicSetup(self, resample_prob=0.0, explore=None, perturbation_interval=10, log_config=False, hyperparams=None, hyperparam_mutations=None, step_once=True): hyperparam_mutations = hyperparam_mutations or { "float_factor": lambda: 100.0, "int_factor": lambda: 10, "id_factor": [100] } pbt = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=perturbation_interval, resample_probability=resample_prob, quantile_fraction=0.25, hyperparam_mutations=hyperparam_mutations, custom_explore_fn=explore, log_config=log_config) runner = _MockTrialRunner(pbt) for i in range(5): trial_hyperparams = hyperparams or { "float_factor": 2.0, "const_factor": 3, "int_factor": 10, "id_factor": i } trial = _MockTrial(i, trial_hyperparams) runner.add_trial(trial) trial.status = Trial.RUNNING if step_once: self.assertEqual( pbt.on_trial_result(runner, trial, result(10, 50 * i)), TrialScheduler.CONTINUE) pbt.reset_stats() return pbt, runner
def testNoConfig(self): scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, hyperparam_mutations={ "a": tune.uniform(0, 0.3), "b": [1, 2, 3], "c": { "c1": lambda: np.random.uniform(0.5), "c2": tune.choice([2, 3, 4]) } }, ) tune.run( MockTrainingFunc2, fail_fast=True, num_samples=4, scheduler=scheduler, name="testNoConfig", stop={"training_iteration": 3})
def testPermutationContinuation(self): """ Tests continuation of runs after permutation. Sometimes, runs were continued from deleted checkpoints. This deterministic initialisation would fail when the fix was not applied. See issues #9036, #9036 """ scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}) param_a = MockParam([10, 20, 30, 40]) param_b = MockParam([1.2, 0.9, 1.1, 0.8]) random.seed(100) np.random.seed(1000) tune.run( MockTrainable, config={ "a": tune.sample_from(lambda _: param_a()), "b": tune.sample_from(lambda _: param_b()), "c": 1 }, fail_fast=True, num_samples=4, checkpoint_freq=1, checkpoint_at_end=True, keep_checkpoints_num=1, checkpoint_score_attr="min-training_iteration", scheduler=scheduler, name="testPermutationContinuation", stop={"training_iteration": 3})
training_operator_cls=CifarTrainingOperator, initialization_hook=initialization_hook, num_workers=args.num_workers, config={ "test_mode": args.smoke_test, # whether to to subset the data BATCH_SIZE: 128 * args.num_workers, }, use_gpu=args.use_gpu, use_fp16=args.fp16) pbt_scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="val_loss", mode="min", perturbation_interval=1, hyperparam_mutations={ # distribution for resampling "lr": lambda: np.random.uniform(0.001, 1), # allow perturbations within this set of categorical values "momentum": [0.8, 0.9, 0.99], }) reporter = CLIReporter() reporter.add_metric_column("val_loss", "loss") reporter.add_metric_column("val_accuracy", "acc") analysis = tune.run( TorchTrainable, num_samples=4, config={ "lr": tune.choice([0.001, 0.01, 0.1]),
def main( scenario, headless, time_total_s, rollout_fragment_length, train_batch_size, seed, num_samples, num_agents, num_workers, resume_training, result_dir, checkpoint_num, save_model_path, ): assert train_batch_size > 0, f"{train_batch_size.__name__} cannot be less than 1." if rollout_fragment_length > train_batch_size: rollout_fragment_length = train_batch_size pbt = PopulationBasedTraining( time_attr="time_total_s", metric="episode_reward_mean", mode="max", perturbation_interval=300, resample_probability=0.25, # Specifies the mutations of these hyperparams # See: `ray.rllib.agents.trainer.COMMON_CONFIG` for common hyperparams hyperparam_mutations={ "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "rollout_fragment_length": lambda: rollout_fragment_length, "train_batch_size": lambda: train_batch_size, }, # Specifies additional mutations after hyperparam_mutations is applied custom_explore_fn=explore, ) # XXX: There is a bug in Ray where we can only export a trained model if # the policy it's attached to is named 'default_policy'. # See: https://github.com/ray-project/ray/issues/5339 rllib_policies = { "default_policy": ( None, rllib_agent["observation_space"], rllib_agent["action_space"], {"model": {"custom_model": TrainingModel.NAME}}, ) } smarts.core.seed(seed) tune_config = { "env": RLlibHiWayEnv, "log_level": "WARN", "num_workers": num_workers, "env_config": { "seed": tune.sample_from(lambda spec: random.randint(0, 300)), "scenarios": [str(Path(scenario).expanduser().resolve().absolute())], "headless": headless, "agent_specs": { f"AGENT-{i}": rllib_agent["agent_spec"] for i in range(num_agents) }, }, "multiagent": {"policies": rllib_policies}, "callbacks": Callbacks, } experiment_name = "rllib_example_multi" result_dir = Path(result_dir).expanduser().resolve().absolute() if checkpoint_num: checkpoint = str( result_dir / f"checkpoint_{checkpoint_num}" / f"checkpoint-{checkpoint_num}" ) else: checkpoint = None print(f"Checkpointing at {str(result_dir)}") analysis = tune.run( "PG", name=experiment_name, stop={"time_total_s": time_total_s}, checkpoint_freq=1, checkpoint_at_end=True, local_dir=str(result_dir), resume=resume_training, restore=checkpoint, max_failures=3, num_samples=num_samples, export_formats=["model", "checkpoint"], config=tune_config, scheduler=pbt, ) print(analysis.dataframe().head()) best_logdir = Path(analysis.get_best_logdir("episode_reward_max", mode="max")) model_path = best_logdir / "model" copy_tree(str(model_path), save_model_path, overwrite=True) print(f"Wrote model to: {save_model_path}")
}, "stop": { "mean_accuracy": 0.80, "training_iteration": 30, }, "config": { "epochs": 1, "batch_size": 64, "lr": grid_search([10**-4, 10**-5]), "decay": lambda spec: spec.config.lr / 100.0, "dropout": grid_search([0.25, 0.5]), }, "num_samples": 4, } if args.smoke_test: train_spec["config"]["lr"] = 10**-4 train_spec["config"]["dropout"] = 0.5 ray.init() pbt = PopulationBasedTraining(time_attr="training_iteration", reward_attr="mean_accuracy", perturbation_interval=10, hyperparam_mutations={ "dropout": lambda _: np.random.uniform(0, 1), }) run_experiments({"pbt_cifar10": train_spec}, scheduler=pbt)
if __name__ == "__main__": # Hyper-Hyper parameters epochs_per_generation = 25 population_size = 10 num_generations = 4 hyperparam_mutations = dict() hyperparam_mutations["actor_lr"] = lambda: tune.loguniform(1e-5, 1e-1) hyperparam_mutations["critic_lr"] = lambda: tune.loguniform(1e-5, 1e-1) hyperparam_mutations["THRESH"] = lambda: tune.uniform(.01, .99) #hyperparam_mutations["copy_step"] = [10, 25, 50, 100] schedule = PopulationBasedTraining( time_attr='epoch', metric='avg_reward', mode='max', perturbation_interval=epochs_per_generation, hyperparam_mutations=hyperparam_mutations) ## If this code throws an error bytes has no readonly flag, comment out a line in cloudpickle_fast (see this discussion: https://github.com/ray-project/ray/issues/8262) tune.run( DDPG_Trainable, verbose=0, local_dir=BASE_DIR, config=dict( total_episodes=epochs_per_generation, n_epochs=epochs_per_generation, grid_size=16, THRESH=tune.uniform(.01, .99), noise_std_dev=.2, #hidden_unit_0 = tune.choice([8, 16, 32, 64, 128]),
parser = argparse.ArgumentParser() parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() if args.smoke_test: ray.init(num_cpus=2) # force pausing to happen for test else: ray.init() pbt = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=4, hyperparam_mutations={ # distribution for resampling "lr": lambda: random.uniform(0.0001, 0.02), # allow perturbations within this set of categorical values "some_other_factor": [1, 2], }) tune.run( pbt_function, name="pbt_test", scheduler=pbt, verbose=False, stop={ "training_iteration": 30, }, num_samples=8,
run=Params.training_alg, # must be the same as the default config config=config, stop=Params.stop_conditions, local_dir=Params.ray_results_dir, max_failures=9999, checkpoint_freq=Params.checkpoint_freq, checkpoint_at_end=True, loggers=loggers, ) # defining population scheduler pbt_scheduler = PopulationBasedTraining( time_attr='training_iteration', metric='episode_reward_mean', mode='max', perturbation_interval=Params.training_iteration // 100, # perturbate a total of N times during the training hyperparam_mutations={ # fixme: get correct params "lr": [1e-4], }) # run the experiment trials = run( exp, reuse_actors=False, verbose=Params.verbose, raise_on_failed_trial=True, # avoid agent not known error return_trials=True, # scheduler=pbt_scheduler, )
# ensure we collect enough timesteps to do sgd if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: config["train_batch_size"] = config["sgd_minibatch_size"] * 2 # ensure we run at least one sgd iter if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 return config pbt = PopulationBasedTraining( time_attr="time_total_s", metric="episode_reward_mean", mode="max", perturbation_interval=120, resample_probability=0.25, # Specifies the mutations of these hyperparams hyperparam_mutations={ "lambda": lambda: random.uniform(0.9, 1.0), "clip_param": lambda: random.uniform(0.01, 0.5), "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "num_sgd_iter": lambda: random.randint(1, 30), "sgd_minibatch_size": lambda: random.randint(128, 16384), "train_batch_size": lambda: random.randint(2000, 160000), }, custom_explore_fn=explore) ray.init() run( "PPO", name="pbt_humanoid_test", scheduler=pbt, num_samples=8, config={
config["num_sgd_iter"] = 1 # ensure that the size of the train batches exactly batches the sum of length of rollout fragments config["rollout_fragment_length"] = config["train_batch_size"] \ / (config["num_workers"] * config["num_envs_per_worker"]) return config pbt = PopulationBasedTraining( time_attr="timesteps_total", #time_total_s metric="episode_reward_mean", mode="max", perturbation_interval=400000, resample_probability=0.25, quantile_fraction=0.25, # Specifies the mutations of these hyperparams hyperparam_mutations={ "lambda": lambda: random.uniform(0.9, 1.0), "clip_param": lambda: random.uniform(0.01, 0.5), "lr": [1.0e-3, 3.0e-4, 1.0e-4, 3.0e-5, 1.0e-5], "num_sgd_iter": lambda: random.randint(1, 32), "sgd_minibatch_size": lambda: random.randint(64, 1024), "train_batch_size": lambda: random.randint(256, 4096), }, custom_explore_fn=explore) tune.run( AGENT_ALGORITHM, name="_".join([GYM_ENV_NAME, "PBT", AGENT_ALGORITHM]), scheduler=pbt, num_samples=8, reuse_actors=False,
def main(args): cfg = setup(args) exp_metrics = dict(metric="score", mode="max") if args.srch_algo == "hyperopt": # Create a HyperOpt search space search_space = { # "lr": hp.loguniform("lr", np.log(1e-6), np.log(1e-3)), # "delay_epochs": hp.randint("delay_epochs", 20, 60), # "wd": hp.uniform("wd", 0, 1e-3), # "wd_bias": hp.uniform("wd_bias", 0, 1e-3), "bsz": hp.choice("bsz", [64, 96, 128, 160, 224, 256]), "num_inst": hp.choice("num_inst", [2, 4, 8, 16, 32]), # "ce_scale": hp.uniform("ce_scale", 0.1, 1.0), # "circle_scale": hp.choice("circle_scale", [16, 32, 64, 128, 256]), # "circle_margin": hp.uniform("circle_margin", 0, 1) * 0.4 + 0.1, } current_best_params = [{ "bsz": 0, # index of hp.choice list "num_inst": 3, }] search_algo = HyperOptSearch(search_space, points_to_evaluate=current_best_params, **exp_metrics) if args.pbt: scheduler = PopulationBasedTraining( time_attr="training_iteration", **exp_metrics, perturbation_interval=2, hyperparam_mutations={ "bsz": [64, 96, 128, 160, 224, 256], "num_inst": [2, 4, 8, 16, 32], }) else: scheduler = ASHAScheduler(grace_period=2, reduction_factor=3, max_t=7, **exp_metrics) elif args.srch_algo == "bohb": search_space = CS.ConfigurationSpace() search_space.add_hyperparameters([ # CS.UniformFloatHyperparameter(name="lr", lower=1e-6, upper=1e-3, log=True), # CS.UniformIntegerHyperparameter(name="delay_epochs", lower=20, upper=60), # CS.UniformFloatHyperparameter(name="ce_scale", lower=0.1, upper=1.0), # CS.UniformIntegerHyperparameter(name="circle_scale", lower=8, upper=128), # CS.UniformFloatHyperparameter(name="circle_margin", lower=0.1, upper=0.5), # CS.UniformFloatHyperparameter(name="wd", lower=0, upper=1e-3), # CS.UniformFloatHyperparameter(name="wd_bias", lower=0, upper=1e-3), CS.CategoricalHyperparameter(name="bsz", choices=[64, 96, 128, 160, 224, 256]), CS.CategoricalHyperparameter(name="num_inst", choices=[2, 4, 8, 16, 32]), # CS.CategoricalHyperparameter(name="autoaug_enabled", choices=[True, False]), # CS.CategoricalHyperparameter(name="cj_enabled", choices=[True, False]), ]) search_algo = TuneBOHB(search_space, max_concurrent=4, **exp_metrics) scheduler = HyperBandForBOHB( time_attr="training_iteration", reduction_factor=3, max_t=7, **exp_metrics, ) else: raise ValueError( "Search algorithm must be chosen from [hyperopt, bohb], but got {}" .format(args.srch_algo)) reporter = CLIReporter(parameter_columns=["bsz", "num_inst"], metric_columns=["r1", "map", "training_iteration"]) analysis = tune.run(partial(train_tuner, cfg=cfg), resources_per_trial={ "cpu": 4, "gpu": 1 }, search_alg=search_algo, num_samples=args.num_trials, scheduler=scheduler, progress_reporter=reporter, local_dir=cfg.OUTPUT_DIR, keep_checkpoints_num=10, name=args.srch_algo) best_trial = analysis.get_best_trial("score", "max", "last") logger.info("Best trial config: {}".format(best_trial.config)) logger.info("Best trial final validation mAP: {}, Rank-1: {}".format( best_trial.last_result["map"], best_trial.last_result["r1"])) save_dict = dict(R1=best_trial.last_result["r1"].item(), mAP=best_trial.last_result["map"].item()) save_dict.update(best_trial.config) path = os.path.join(cfg.OUTPUT_DIR, "best_config.yaml") with PathManager.open(path, "w") as f: f.write(CfgNode(save_dict).dump()) logger.info("Best config saved to {}".format(os.path.abspath(path)))
def training_team(params): env_config, policies = initialize(params) # PBT setting pbt_scheduler = PopulationBasedTraining( time_attr=params["time_attr"], metric="policy_reward_mean/policy_0", mode="max", perturbation_interval=params["perturbation_interval"], custom_explore_fn=limit_gamma_explore, hyperparam_mutations={ "lr": lambda: random.uniform(0.0001, 0.1), # "gamma": lambda: random.uniform(0.85, 0.999) }) trials = tune.run( PPOTrainer, restore=params["restore"], resume=params["resume"], name=params["name"], queue_trials=params["queue_trials"], scheduler=pbt_scheduler, num_samples=params["num_samples"], stop={ # "training_iteration": params["training_iteration"], "timesteps_total": 1000000000 }, checkpoint_freq=params["checkpoint_freq"], checkpoint_at_end=True, verbose=1, config={ "gamma": params["gamma"], "lr": params["lr"], "entropy_coeff": params["entropy_coeff"], "kl_coeff": params["kl_coeff"], # disable KL "batch_mode": "complete_episodes" if params["complete_episodes"] else "truncate_episodes", "rollout_fragment_length": params["rollout_fragment_length"], "env": "PommeMultiAgent-v1", "env_config": env_config, "num_workers": params["num_workers"], "num_envs_per_worker": params["num_envs_per_worker"], "num_gpus_per_worker": params["num_gpus_per_worker"], "num_gpus": params["num_gpus"], "train_batch_size": params["train_batch_size"], "sgd_minibatch_size": params["sgd_minibatch_size"], "clip_param": params["clip_param"], "lambda": params["lambda"], "num_sgd_iter": params["num_sgd_iter"], "vf_share_layers": True, "vf_loss_coeff": params["vf_loss_coeff"], "vf_clip_param": params["vf_clip_param"], "callbacks": PommeCallbacks, "multiagent": { "policies": policies, "policy_mapping_fn": policy_mapping, "policies_to_train": ["policy_0"], }, "observation_filter": "MeanStdFilter", # should use MeanStdFilter "evaluation_num_episodes": params["evaluation_num_episodes"], "evaluation_interval": params["evaluation_interval"], "log_level": "WARN", "use_pytorch": True })
"approx": sample_from( lambda _: random.choice(sample_blocks(32, args.approx))), "swap_period": 100, "baseline": args.baseline, "restore_path": restore_path, }, "num_samples": 10 } ray.init() pbt = PopulationBasedTraining(time_attr="training_iteration", reward_attr="performance", perturbation_interval=10, hyperparam_mutations={ "lr": lambda: random.uniform(.0001, 1), "mom": lambda: random.uniform(.5, 1), }) run(Cifar100Model, name=args.test_name, local_dir=args.dir, scheduler=pbt, resume=False, reuse_actors=True, **train_spec)
if config["train_batch_size"] < config["sgd_minibatch_size"] * 2: config["train_batch_size"] = config["sgd_minibatch_size"] * 2 # ensure we run at least one sgd iter if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 return config pbt = PopulationBasedTraining( time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=120, resample_probability=0.25, # Specifies the mutations of these hyperparams hyperparam_mutations={ # "lambda": lambda: random.uniform(0.9, 1.0), # "clip_param": lambda: random.uniform(0.01, 0.5), "lr": [1e-2, 5e-3, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5, 5e-6], "gamma": [0.997,0.995,0.99,0.98,0.97,0.95,0.9,0.85,0.8], "entropy_coeff": [0.1, 0.09, 0.08, 0.07, 0.06, 0.05, 0.04, 0.03, 0.02, 0.01, 0.0], # "num_sgd_iter": lambda: random.randint(1, 30), # "sgd_minibatch_size": lambda: random.randint(128, 16384), # "train_batch_size": lambda: random.randint(2000, 160000), }, custom_explore_fn=explore) ray.init() run( "PPO", name="pbt_halfcheetah", scheduler=pbt, **{
def tune_transformer(num_samples=8, gpus_per_trial=0, smoke_test=False): data_dir_name = "./data" if not smoke_test else "./test_data" data_dir = os.path.abspath(os.path.join(os.getcwd(), data_dir_name)) if not os.path.exists(data_dir): os.mkdir(data_dir, 0o755) # Change these as needed. model_name = "bert-base-uncased" if not smoke_test \ else "sshleifer/tiny-distilroberta-base" task_name = "rte" task_data_dir = os.path.join(data_dir, task_name.upper()) num_labels = glue_tasks_num_labels[task_name] config = AutoConfig.from_pretrained(model_name, num_labels=num_labels, finetuning_task=task_name) # Download and cache tokenizer, model, and features print("Downloading and caching Tokenizer") tokenizer = AutoTokenizer.from_pretrained(model_name) # Triggers tokenizer download to cache print("Downloading and caching pre-trained model") AutoModelForSequenceClassification.from_pretrained( model_name, config=config, ) def get_model(): return AutoModelForSequenceClassification.from_pretrained( model_name, config=config, ) # Download data. download_data(task_name, data_dir) data_args = GlueDataTrainingArguments(task_name=task_name, data_dir=task_data_dir) train_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="train", cache_dir=task_data_dir) eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev", cache_dir=task_data_dir) training_args = TrainingArguments( output_dir=".", learning_rate=1e-5, # config do_train=True, do_eval=True, no_cuda=gpus_per_trial <= 0, evaluation_strategy="epoch", load_best_model_at_end=True, num_train_epochs=2, # config max_steps=-1, per_device_train_batch_size=16, # config per_device_eval_batch_size=16, # config warmup_steps=0, weight_decay=0.1, # config logging_dir="./logs", skip_memory_metrics=True, report_to="none") trainer = Trainer(model_init=get_model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, compute_metrics=build_compute_metrics_fn(task_name)) tune_config = { "per_device_train_batch_size": 32, "per_device_eval_batch_size": 32, "num_train_epochs": tune.choice([2, 3, 4, 5]), "max_steps": 1 if smoke_test else -1, # Used for smoke test. } scheduler = PopulationBasedTraining(time_attr="training_iteration", metric="eval_acc", mode="max", perturbation_interval=1, hyperparam_mutations={ "weight_decay": tune.uniform(0.0, 0.3), "learning_rate": tune.uniform(1e-5, 5e-5), "per_device_train_batch_size": [16, 32, 64], }) reporter = CLIReporter(parameter_columns={ "weight_decay": "w_decay", "learning_rate": "lr", "per_device_train_batch_size": "train_bs/gpu", "num_train_epochs": "num_epochs" }, metric_columns=[ "eval_acc", "eval_loss", "epoch", "training_iteration" ]) trainer.hyperparameter_search( hp_space=lambda _: tune_config, backend="ray", n_trials=num_samples, resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, scheduler=scheduler, keep_checkpoints_num=1, checkpoint_score_attr="training_iteration", stop={"training_iteration": 1} if smoke_test else None, progress_reporter=reporter, local_dir="~/ray_results/", name="tune_transformer_pbt", log_to_file=True)
"mean_accuracy": 0.90, "training_iteration": 50, }, "config": { "epochs": 1, "batch_size": 64*6, "lr": grid_search([10**-3, 10**-4]), "decay": sample_from(lambda spec: spec.config.lr / 10.0), "depth": grid_search([20,32,44,50]), }, "local_dir": args.output_dir, "num_samples": 8, # "checkpoint_freq":1, } if args.smoke_test: train_spec["config"]["lr"] = 10**-4 train_spec["config"]["depth"] = 20 ray.init(redis_address=args.redis_address, log_to_driver=False) pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="mean_accuracy", perturbation_interval=10, hyperparam_mutations={ "decay": [10**-4, 10**-5, 10**-6], }) run_experiments({"pbt_cifar10": train_spec}, scheduler=pbt)
def searchBestHypers(num_samples=10, max_num_epochs=15, n_epochs_stop=2, grace_period=5, gpus_per_trial=0, data_obj=None): #import os #os.chdir('drive/My Drive/DL project/') assert data_obj is not None experiment_id = 'no_name_yet' config_schedule = { "batch_size": tune.choice([4, 8, 16, 32]), "lr": tune.loguniform(1e-4, 1e-1), "h1": tune.sample_from(lambda: 2**np.random.randint(2, 7)), # conv 1 "h2": tune.sample_from(lambda: 2**np.random.randint(2, 7)), # conv 2 "h3": tune.sample_from(lambda: 2**np.random.randint(2, 7)), # conv 3 "h4": tune.sample_from(lambda: 2**np.random.randint(0, 4)), # LSTM hidden "h5": tune.sample_from(lambda: 2**np.random.randint(3, 8)), # linear output "h6": tune.sample_from(lambda: np.random.randint(1, 3)), "wd": tune.loguniform(1e-4, 1e-1), } scheduler = ASHAScheduler(metric="loss", mode="min", max_t=max_num_epochs, grace_period=1, reduction_factor=2) pbt = PopulationBasedTraining(time_attr="training_iteration", metric="loss", mode="min", perturbation_interval=4, hyperparam_mutations={ "batch_size": [4, 8, 16, 32, 64], "lr": tune.loguniform(1e-4, 1e-1), "h1": [4, 8, 16, 32, 64], "h2": [4, 8, 16, 32, 64], "wd": tune.loguniform(1e-4, 1e-1), }) reporter = CLIReporter(metric_columns=["loss", "training_iteration"]) result = tune.run(partial(train_cgm, data_obj=data_obj, n_epochs_stop=n_epochs_stop, max_epochs=max_num_epochs, grace_period=grace_period), resources_per_trial={ "cpu": 1, "gpu": gpus_per_trial }, config=config_schedule, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter) best_trial = result.get_best_trial("loss", "min", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) # Build best network best_trained_model = DilatedNet(h1=best_trial.config["h1"], h2=best_trial.config["h2"], h3=best_trial.config["h3"], h4=best_trial.config["h4"], h5=best_trial.config["h5"], h6=best_trial.config["h6"]) device = "cpu" if torch.cuda.is_available(): device = "cuda:0" if gpus_per_trial > 1: best_trained_model = nn.DataParallel(best_trained_model) best_trained_model.to(device) best_checkpoint_dir = best_trial.checkpoint.value print("BEST MODEL DIR: ", best_checkpoint_dir) model_state, optimizer_state = torch.load( os.path.join(best_checkpoint_dir, "checkpoint")) best_trained_model.load_state_dict(model_state) # Call load to fit scaler. Should be a better solution trainset, valset = data_obj.load_train_and_val() test_rmse_val = test_rmse(best_trained_model, data_obj) print("Best trial test set rmse: {}".format(test_rmse_val)) # Save the results experiment = { 'name': str(experiment_id), 'best_trial_dir': str(best_checkpoint_dir), 'train_data': str(data_obj.train_data), 'test_data': str(data_obj.test_data), 'start_date_train': str(data_obj.start_date_train), 'start_date_test': str(data_obj.start_date_test), 'end_date_train': str(data_obj.end_date_train), 'end_date_test': str(data_obj.end_date_test) } current_time = datetime.datetime.now().strftime('%Y-%m-%d_%H%M%S') user = getpass.getuser() experiment_id = f'id_{current_time}_{user}' experiment_path = code_path / 'hyper_experiments' # / model_id experiment_path.mkdir(exist_ok=True, parents=True) with open(experiment_path / (experiment_id + '.json'), 'w') as outfile: json.dump(experiment, outfile, indent=4) ''' Optinally Print information on where optimal model is saved ''' #print("\n Experiment details are saved in:\n", experiment_path / (experiment_id + '.json')) #print("\n Checkpoint for best configuration issaved in:\n", best_checkpoint_dir) return experiment_id
#model.evaluate(x_test, y_test) configuration = tune.Experiment( "pbt_tune_cifar10", run=train_cifar_tune, resources_per_trial={"cpu":8, "gpu":1}, stop={'mean_accuracy':0.99}, config={ #'block': [ tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25))] 'bloack': [6,6,6,6] } ) pbt = PopulationBasedTraining(time_attr="training_iteration", reward_attr="mean_accuracy", hyperparam_mutations={ 'block': [ tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25)), tune.sample_from(lambda spec: np.random.randint(2, high=25))] } ) trials=tune.run_experiments(configuration, scheduler=pbt) print(trials) exit(1) sched = AsyncHyperBandScheduler( time_attr="timesteps_total", reward_attr="mean_accuracy", max_t=400, grace_period=20)