def basicSetup(self, resample_prob=0.0, explore=None): pbt = PopulationBasedTraining(time_attr="training_iteration", perturbation_interval=10, resample_probability=resample_prob, hyperparam_mutations={ "id_factor": [100], "float_factor": lambda: 100.0, "int_factor": lambda: 10, }, custom_explore_fn=explore) runner = _MockTrialRunner(pbt) for i in range(5): trial = _MockTrial( i, { "id_factor": i, "float_factor": 2.0, "const_factor": 3, "int_factor": 10 }) runner.add_trial(trial) trial.status = Trial.RUNNING self.assertEqual( pbt.on_trial_result(runner, trial, result(10, 50 * i)), TrialScheduler.CONTINUE) pbt.reset_stats() return pbt, runner
def schedulerSetup(self, num_trials): sched = PopulationBasedTraining() runner = _MockTrialRunnerPBT() for i in range(num_trials): t = _MockTrialPBT("__parameter_tuning") t.config = {'test': 1, 'test1': 1, 'env': 'test'} t.experiment_tag = str(i) runner._launch_trial(t) return sched, runner
def basicSetup(self, resample_prob=0.0, explore=None): pbt = PopulationBasedTraining( time_attr="training_iteration", perturbation_interval=10, resample_probability=resample_prob, hyperparam_mutations={ "id_factor": [100], "float_factor": lambda: 100.0, "int_factor": lambda: 10, }, custom_explore_fn=explore) runner = _MockTrialRunner(pbt) for i in range(5): trial = _MockTrial( i, {"id_factor": i, "float_factor": 2.0, "const_factor": 3, "int_factor": 10}) runner.add_trial(trial) trial.status = Trial.RUNNING self.assertEqual( pbt.on_trial_result(runner, trial, result(10, 50 * i)), TrialScheduler.CONTINUE) pbt.reset_stats() return pbt, runner
# ensure we collect enough timesteps to do sgd if config["timesteps_per_batch"] < config["sgd_batchsize"] * 2: config["timesteps_per_batch"] = config["sgd_batchsize"] * 2 # ensure we run at least one sgd iter if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 return config pbt = PopulationBasedTraining( time_attr="time_total_s", reward_attr="episode_reward_mean", perturbation_interval=120, resample_probability=0.25, # Specifies the mutations of these hyperparams hyperparam_mutations={ "lambda": lambda: random.uniform(0.9, 1.0), "clip_param": lambda: random.uniform(0.01, 0.5), "sgd_stepsize": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "num_sgd_iter": lambda: random.randint(1, 30), "sgd_batchsize": lambda: random.randint(128, 16384), "timesteps_per_batch": lambda: random.randint(2000, 160000), }, custom_explore_fn=explore) ray.init() run_experiments( { "pbt_humanoid_test": { "run": "PPO", "env": "Humanoid-v1", "repeat": 8,
}, "stop": { "mean_accuracy": 0.80, "timesteps_total": 300, }, "config": { "epochs": 1, "batch_size": 64, "lr": grid_search([10**-4, 10**-5]), "decay": lambda spec: spec.config.lr / 100.0, "dropout": grid_search([0.25, 0.5]), }, "repeat": 4, } if args.smoke_test: train_spec["config"]["lr"] = 10**-4 train_spec["config"]["dropout"] = 0.5 ray.init() pbt = PopulationBasedTraining(time_attr="timesteps_total", reward_attr="mean_accuracy", perturbation_interval=10, hyperparam_mutations={ "dropout": lambda _: np.random.uniform(0, 1), }) run_experiments({"pbt_cifar10": train_spec}, scheduler=pbt)
register_trainable("my_class", MyTrainableClass) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=10, hyperparam_mutations={ # Allow for scaling-based perturbations, with a uniform backing # distribution for resampling. "factor_1": lambda: random.uniform(0.0, 20.0), # Allow perturbations within this set of categorical values. "factor_2": [1, 2], }) # Try to find the best factor 1 and factor 2 run_experiments( { "pbt_test": { "run": "my_class", "stop": { "training_iteration": 2 if args.smoke_test else 99999 }, "repeat": 10,
if config["num_sgd_iter"] < 1: config["num_sgd_iter"] = 1 return config pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=75, resample_probability=0.25, #0.25, # Specifies the mutations of these hyperparams hyperparam_mutations={ "lambda": lambda: random.uniform(0.9, 1.0), "gamma": lambda: random.uniform(0.99, 0.999), "horizon": lambda: random.randint(256, 2048), "clip_param": lambda: random.uniform(0.01, 0.4), "sgd_stepsize": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5], "num_sgd_iter": lambda: random.randint(2, 10), #"sgd_batchsize": lambda: random.randint(128, 16384), "timesteps_per_batch": lambda: random.randint(16, 256), "vf_loss_coeff": lambda: random.uniform(0.3, 1), "entropy_coeff": lambda: random.uniform(0.0, 0.01), #"kl_coeff": [0.0,0.2,1.0], "kl_target": lambda: random.uniform(0.003, 0.03) } #,custom_explore_fn=explore ) ray.init() run_experiments( {