def variant_equals(*keys): def get_from_spec(spec): # TODO(hartikainen): This may break in some cases. ray.tune seems to # add a 'config' key at the top of the spec, whereas `generate_variants` # does not. node = spec.get('config', spec) for key in keys: node = node[key] return node return sample_from(get_from_spec)
def testDependentLambda(self): trials = self.generate_trials({ "run": "PPO", "config": { "x": grid_search([1, 2]), "y": tune.sample_from(lambda spec: spec.config.x * 100), }, }, "dependent_lambda") trials = list(trials) self.assertEqual(len(trials), 2) self.assertEqual(trials[0].config, {"x": 1, "y": 100}) self.assertEqual(trials[1].config, {"x": 2, "y": 200})
def choice_n(categories, min_items, max_items): """ Sample a subset from a list :param categories: A list to be sampled :param min_items: minimum number of items to be sampled :param max_items: maximum number of items to be sampled """ return tune.sample_from(lambda spec: list( np.random.choice(categories, size=np.random.randint(low=min_items, high=max_items), replace=False)))
def config_word_emb_w2v_custom(config_test_dct): from ray import tune import numpy as np import random import utils.definition_network as dn return { "name": "wc_lstm_t1", "exp_sets": generate_model( dict({ 'function': 'w2vCustom', 'embedding_type': dn.EmbeddingType.WORD2VEC_CUSTOM, 'custom_file': 'SMHD-CBOW-A-D-ADUsers-300.bin', 'use_embedding': dn.UseEmbedding.NON_STATIC, 'total_registers': config_test_dct['total_registers'], 'subdirectory': config_test_dct['subdirectory'] }) ), "batch_size": tune.sample_from(lambda spec: random.choice([20, 10, 25, 40])), "epochs": tune.sample_from(lambda spec: np.random.randint(32, 80)), "threads": 2, "lr": tune.sample_from(lambda spec: np.random.uniform(0.001, 0.01)), # "momentum": tune.sample_from(lambda spec: np.random.uniform(0.1, 0.9)), "lstm_units": tune.sample_from(lambda spec: np.random.randint(16, 128)), "dropout_lstm": tune.sample_from(lambda spec: np.random.uniform(0.2, 0.4)), "hidden_layers": tune.sample_from(lambda spec: np.random.randint(3, 5)) }
def get_variant_spec_image(universe, domain, task, policy, algorithm, *args, **kwargs): variant_spec = get_variant_spec_base( universe, domain, task, policy, algorithm, *args, **kwargs) if is_image_env(domain, task, variant_spec): preprocessor_params = { 'type': 'convnet_preprocessor', 'kwargs': { 'conv_filters': (64, ) * 3, 'conv_kernel_sizes': (3, ) * 3, 'conv_strides': (2, ) * 3, 'normalization_type': 'layer', 'downsampling_type': 'conv', }, } variant_spec['policy_params']['kwargs']['hidden_layer_sizes'] = (M, M) variant_spec['policy_params']['kwargs']['observation_preprocessors_params'] = { 'pixels': deepcopy(preprocessor_params) } # for key in ('hidden_layer_sizes', 'observation_preprocessors_params'): variant_spec['Q_params']['kwargs']['hidden_layer_sizes'] = ( tune.sample_from(lambda spec: (deepcopy( spec.get('config', spec)['policy_params']['kwargs']['hidden_layer_sizes'] ))) ) variant_spec['Q_params']['kwargs']['observation_preprocessors_params'] = ( tune.sample_from(lambda spec: (deepcopy( spec.get('config', spec)['policy_params']['kwargs']['observation_preprocessors_params'] ))) ) return variant_spec
def _fill_config_hyperparam(self, config): """Fill in the ``config`` dictionary with the hyperparameters. Each distribution in ``self.param_distributions`` must implement the ``rvs`` method to generate a random variable. The [0] is present to extract the single value out of a list, which is returned by ``rvs``. Args: config (:obj:`dict`): dictionary to be filled in as the configuration for `tune.run`. """ if (self.search_optimization == "bayesian" or isinstance(self.search_optimization, BayesOptSearch)): return if isinstance(self.param_distributions, list): return samples = 1 all_lists = True for key, distribution in self.param_distributions.items(): if isinstance(distribution, list): import random def get_sample(dist): return lambda spec: dist[random.randint(0, len(dist) - 1)] config[key] = tune.sample_from(get_sample(distribution)) samples *= len(distribution) else: all_lists = False def get_sample(dist): return lambda spec: dist.rvs(1)[0] config[key] = tune.sample_from(get_sample(distribution)) if all_lists: self.num_samples = min(self.num_samples, samples)
def testMemoryCheckpointFree(self): class MyTrainable(Trainable): def setup(self, config): # Make sure this is large enough so ray uses object store # instead of in-process store. self.large_object = random.getrandbits(int(10e6)) self.iter = 0 self.a = config["a"] def step(self): self.iter += 1 return {"metric": self.iter + self.a} def save_checkpoint(self, checkpoint_dir): file_path = os.path.join(checkpoint_dir, "model.mock") with open(file_path, "wb") as fp: pickle.dump((self.large_object, self.iter, self.a), fp) return file_path def load_checkpoint(self, path): with open(path, "rb") as fp: self.large_object, self.iter, self.a = pickle.load(fp) class CustomExecutor(RayTrialExecutor): def save(self, *args, **kwargs): checkpoint = super(CustomExecutor, self).save(*args, **kwargs) assert object_memory_usage() <= (12 * 80e6) return checkpoint param_a = MockParam([1, -1]) pbt = PopulationBasedTraining( time_attr="training_iteration", metric="metric", mode="max", perturbation_interval=1, hyperparam_mutations={"b": [-1]}, ) tune.run( MyTrainable, name="ray_demo", scheduler=pbt, stop={"training_iteration": 10}, num_samples=3, checkpoint_freq=1, fail_fast=True, config={"a": tune.sample_from(lambda _: param_a())}, trial_executor=CustomExecutor(queue_trials=False, reuse_actors=False), )
def testTuneSampleAPI(self): config = { "func": tune.sample_from(lambda spec: spec.config.uniform * 0.01), "uniform": tune.uniform(-5, -1), "quniform": tune.quniform(3.2, 5.4, 0.2), "loguniform": tune.loguniform(1e-4, 1e-2), "qloguniform": tune.qloguniform(1e-4, 1e-1, 5e-5), "choice": tune.choice([2, 3, 4]), "randint": tune.randint(-9, 15), "qrandint": tune.qrandint(-21, 12, 3), "randn": tune.randn(10, 2), "qrandn": tune.qrandn(10, 2, 0.2), } for _, (_, generated) in zip(range(1000), generate_variants({"config": config})): out = generated["config"] self.assertAlmostEqual(out["func"], out["uniform"] * 0.01) self.assertGreaterEqual(out["uniform"], -5) self.assertLess(out["uniform"], -1) self.assertGreaterEqual(out["quniform"], 3.2) self.assertLessEqual(out["quniform"], 5.4) self.assertAlmostEqual(out["quniform"] / 0.2, round(out["quniform"] / 0.2)) self.assertGreaterEqual(out["loguniform"], 1e-4) self.assertLess(out["loguniform"], 1e-2) self.assertGreaterEqual(out["qloguniform"], 1e-4) self.assertLessEqual(out["qloguniform"], 1e-1) self.assertAlmostEqual(out["qloguniform"] / 5e-5, round(out["qloguniform"] / 5e-5)) self.assertIn(out["choice"], [2, 3, 4]) self.assertGreaterEqual(out["randint"], -9) self.assertLess(out["randint"], 15) self.assertGreaterEqual(out["qrandint"], -21) self.assertLessEqual(out["qrandint"], 12) self.assertEqual(out["qrandint"] % 3, 0) # Very improbable self.assertGreater(out["randn"], 0) self.assertLess(out["randn"], 20) self.assertGreater(out["qrandn"], 0) self.assertLess(out["qrandn"], 20) self.assertAlmostEqual(out["qrandn"] / 0.2, round(out["qrandn"] / 0.2))
def testRecursiveDep(self): try: list( self.generate_trials({ "run": "PPO", "config": { "foo": tune.sample_from(lambda spec: spec.config.foo), }, }, "recursive_dep")) except RecursiveDependencyError as e: assert "`foo` recursively depends on" in str(e), e else: assert False
def _best_guess_spec(envs=None): spec = { "config": { "env_name:embed_path": tune.grid_search(_env_victim(envs)), "embed_index": tune.sample_from(lambda spec: VICTIM_INDEX[spec.config[ "env_name:embed_path"][0]]), "seed": tune.grid_search(list(range(3))), }, } return spec
def run_test_exp(self): ahb = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr=self.metric, grace_period=5, max_t=100) run(MyTrainableClass, name=self.test_name, scheduler=ahb, local_dir=self.test_dir, **{ "stop": { "training_iteration": 1 }, "num_samples": 10, "config": { "width": sample_from(lambda spec: 10 + int(90 * random.random())), "height": sample_from(lambda spec: int(100 * random.random())), }, })
def parse_option_space(args): sampling_space = {'trainer_path': 'runs'} perturb_space = {} for opt in args.opt_space: key, optstr = opt.split('=') space_opt = eval(optstr) if isinstance(space_opt, (int, float)): sampling_space[key] = space_opt elif type(space_opt) is list: sampling_space[key] = tune.grid_search(space_opt) perturb_space[key] = space_opt elif type(space_opt) is str: sampling_space[key] = tune.sample_from( eval('lambda spec: ' + space_opt)) perturb_space[key] = eval('lambda: ' + space_opt) else: sampling_space[key] = tune.sample_from(space_opt) perturb_space[key] = space_opt return sampling_space, perturb_space
def testQueueFilling(self): os.environ["TUNE_MAX_PENDING_TRIALS_PG"] = "1" ray.init(num_cpus=4) def f1(config): for i in range(10): yield i tune.register_trainable("f1", f1) search_alg = BasicVariantGenerator() search_alg.add_configurations( { "foo": { "run": "f1", "num_samples": 100, "config": { "a": tune.sample_from(lambda spec: 5.0 / 7), "b": tune.sample_from(lambda spec: "long" * 40), }, "resources_per_trial": {"cpu": 2}, } } ) runner = TrialRunner(search_alg=search_alg) runner.step() runner.step() runner.step() self.assertEqual(len(runner._trials), 3) runner.step() self.assertEqual(len(runner._trials), 3) self.assertEqual(runner._trials[0].status, Trial.RUNNING) self.assertEqual(runner._trials[1].status, Trial.RUNNING) self.assertEqual(runner._trials[2].status, Trial.PENDING)
def testDependentGridSearchCallable(self): class Normal: def __call__(self, _config): return random.normalvariate(mu=0, sigma=1) class Single: def __call__(self, _config): return 20 trials = self.generate_trials({ "run": "PPO", "config": { "x": grid_search( [tune.sample_from(Normal()), tune.sample_from(Normal())]), "y": tune.sample_from(Single()), }, }, "dependent_grid_search") trials = list(trials) self.assertEqual(len(trials), 2) self.assertEqual(trials[0].config["y"], 20) self.assertEqual(trials[1].config["y"], 20)
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=2): config = { "l1": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), "l2": tune.sample_from(lambda _: 2 ** np.random.randint(2, 9)), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([2, 4, 8, 16]) } scheduler = ASHAScheduler( max_t=max_num_epochs, grace_period=1, reduction_factor=2) result = tune.run( tune.with_parameters(train_cifar), resources_per_trial={"cpu": 2, "gpu": gpus_per_trial}, config=config, metric="loss", mode="min", num_samples=num_samples, scheduler=scheduler ) best_trial = result.get_best_trial("loss", "min", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["loss"])) print("Best trial final validation accuracy: {}".format( best_trial.last_result["accuracy"])) if ray.util.client.ray.is_connected(): # If using Ray Client, we want to make sure checkpoint access # happens on the server. So we wrap `test_best_model` in a Ray task. # We have to make sure it gets executed on the same node that # ``tune.run`` is called on. from ray.util.ml_utils.node import force_on_current_node remote_fn = force_on_current_node(ray.remote(test_best_model)) ray.get(remote_fn.remote(best_trial)) else: test_best_model(best_trial)
def fft_experiment_temp_annealing(fixed_order, softmax_fn, size, ntrials, nsteps, result_dir, nthreads, smoke_test): assert softmax_fn in ['softmax', 'sparsemax'] config = { 'fixed_order': fixed_order, 'softmax_fn': softmax_fn, 'size': size, 'lr': sample_from(lambda spec: math.exp( random.uniform(math.log(1e-4), math.log(5e-1)))), 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'n_steps_per_epoch': nsteps, } if (not fixed_order) and softmax_fn == 'softmax': config['semantic_loss_weight'] = sample_from(lambda spec: math.exp( random.uniform(math.log(5e-3), math.log(5e-1)))) experiment = RayExperiment( name=f'Fft_factorization_Temp_{fixed_order}_{softmax_fn}_{size}', run=TrainableFftTempAnnealing, local_dir=result_dir, num_samples=ntrials, checkpoint_at_end=True, resources_per_trial={ 'cpu': nthreads, 'gpu': 0 }, stop={ 'training_iteration': 1 if smoke_test else 99999, 'negative_loss': -1e-8 }, config=config, ) return experiment
def tune_q_log_uniform( high: int, low: int = 1, q: int = 1, ): def func(spec): return int( max( low, numpy.round( numpy.random.uniform(numpy.log(low), numpy.log(high))))) // q * q return tune.sample_from(func)
def run_test_exp(self): def training_function(config, checkpoint_dir=None): tune.report(episode_reward_mean=config["alpha"]) return tune.run( training_function, name=self.test_name, local_dir=self.test_dir, stop={"training_iteration": 1}, num_samples=self.num_samples, config={ "alpha": tune.sample_from(lambda spec: 10 + int(90 * random.random())), }, )
def testPermutationContinuation(self): """ Tests continuation of runs after permutation. Sometimes, runs were continued from deleted checkpoints. This deterministic initialisation would fail when the fix was not applied. See issues #9036, #9036 """ scheduler = PopulationBasedTraining( time_attr="training_iteration", metric="mean_accuracy", mode="max", perturbation_interval=1, log_config=True, hyperparam_mutations={"c": lambda: 1}) param_a = MockParam([10, 20, 30, 40]) param_b = MockParam([1.2, 0.9, 1.1, 0.8]) random.seed(100) np.random.seed(1000) tune.run( MockTrainable, config={ "a": tune.sample_from(lambda _: param_a()), "b": tune.sample_from(lambda _: param_b()), "c": 1 }, fail_fast=True, num_samples=4, checkpoint_freq=1, checkpoint_at_end=True, keep_checkpoints_num=1, checkpoint_score_attr="min-training_iteration", scheduler=scheduler, name="testPermutationContinuation", stop={"training_iteration": 3})
def get_variant_spec(universe, domain, task, policy): variant_spec = { 'domain': domain, 'task': task, 'universe': universe, 'git_sha': get_git_rev(), 'env_params': ENV_PARAMS.get(domain, {}).get(task, {}), 'policy_params': deep_update( POLICY_PARAMS_BASE[policy], POLICY_PARAMS_FOR_DOMAIN[policy].get(domain, {}) ), 'Q_params': { 'type': 'double_feedforward_Q_function', 'kwargs': { 'hidden_layer_sizes': (M, M), } }, 'algorithm_params': deep_update( ALGORITHM_PARAMS_BASE, ALGORITHM_PARAMS_PER_DOMAIN.get(domain, {}) ), 'replay_pool_params': { 'type': 'SimpleReplayPool', 'kwargs': { 'max_size': 1e6, } }, 'sampler_params': { 'type': 'SimpleSampler', 'kwargs': { 'max_path_length': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'min_pool_size': MAX_PATH_LENGTH_PER_DOMAIN.get( domain, DEFAULT_MAX_PATH_LENGTH), 'batch_size': 256, } }, 'run_params': { 'seed': tune.sample_from( lambda spec: np.random.randint(0, 10000)), 'checkpoint_at_end': True, 'checkpoint_frequency': NUM_EPOCHS_PER_DOMAIN.get( domain, DEFAULT_NUM_EPOCHS) // NUM_CHECKPOINTS, 'checkpoint_replay_pool': False, }, } return variant_spec
def dense_env_reward_anneal_search(train): """Search for the best annealing fraction in SumoHumans.""" train = dict(train) _best_guess_train(train) train["total_timesteps"] = int(40e6) train["rew_shape"] = True train["env_name"] = "multicomp/SumoHumansAutoContact-v0" train[ "embed_path"] = 3 # median difficulty victim (1 is easy, 2 is hard) spec = { "config": { "rew_shape_params": { "anneal_frac": tune.sample_from(lambda spec: np.random.rand()), }, "seed": tune.sample_from(lambda spec: np.random.randint(1000)), }, "run_kwargs": { "num_samples": 10 }, } exp_name = "dense_env_reward_anneal_search" _ = locals() # quieten flake8 unused variable warning del _
def testGetLastCheckpoint(self): # one more experiment with 2 iterations new_ea = tune.run( MyTrainableClass, name=self.test_name, local_dir=self.test_dir, stop={"training_iteration": 2}, checkpoint_freq=1, config={ "width": tune.sample_from(lambda spec: 10 + int(90 * random.random())), "height": tune.sample_from(lambda spec: int(100 * random.random())), }, ) # check if it's loaded correctly last_checkpoint = new_ea.get_last_checkpoint()._local_path assert self.test_path in last_checkpoint assert "checkpoint_000002" in last_checkpoint # test restoring the checkpoint and running for another iteration tune.run( MyTrainableClass, name=self.test_name, local_dir=self.test_dir, restore=last_checkpoint, stop={"training_iteration": 3}, checkpoint_freq=1, config={ "width": tune.sample_from(lambda spec: 10 + int(90 * random.random())), "height": tune.sample_from(lambda spec: int(100 * random.random())), }, )
def dense_env_reward_anneal_search(train): """Search for the best annealing fraction in SumoHumans.""" train = dict(train) _best_guess_train(train) train['total_timesteps'] = int(40e6) train['rew_shape'] = True train['env_name'] = 'multicomp/SumoHumansAutoContact-v0' train['victim_path'] = 3 # median difficulty victim (1 is easy, 2 is hard) spec = { 'config': { 'rew_shape_params': { 'anneal_frac': tune.sample_from( lambda spec: np.random.rand() ), }, 'seed': tune.sample_from( lambda spec: np.random.randint(1000) ), }, 'num_samples': 10, } exp_name = 'dense_env_reward_anneal_search' _ = locals() # quieten flake8 unused variable warning del _
def main(num_samples=10, max_num_epochs=10, gpus_per_trial=3): config = { "input_dim": 3, "steps_ahead": [3, 6, 12], "seq_length": tune.sample_from(lambda _: 2**np.random.randint(8, 11)), "nhid": tune.sample_from(lambda _: 2**np.random.randint(3, 7)), "levels": tune.sample_from(lambda _: 2**np.random.randint(1, 4)), "kernel_size": tune.sample_from(lambda _: 2**np.random.randint(1, 5)), "dropout": tune.choice([0]), "lr": tune.loguniform(1e-4, 1e-1), "batch_size": tune.choice([8, 16, 32, 64, 128]) } scheduler = ASHAScheduler(metric='3', mode="min", max_t=max_num_epochs, grace_period=5, reduction_factor=3) reporter = CLIReporter( parameter_columns=[ "seq_length", "nhid", "levels", "kernel_size", "dropout", "lr", "batch_size" ], metric_columns=["3", "6", "12", "training_iteration"]) result = tune.run(partial(train), resources_per_trial={ "cpu": 2, "gpu": gpus_per_trial }, config=config, num_samples=num_samples, scheduler=scheduler, progress_reporter=reporter) best_trial = result.get_best_trial("3", "min", "last") print("Best trial config: {}".format(best_trial.config)) print("Best trial final validation loss: {}".format( best_trial.last_result["3"]))
def apply_condition(config, data): # TODO: tune.sample_from only supports random search algorithm. # To make conditional parameter for the other algorithms, different # algorithms should take different methods (will be added) if args.algorithm == 'random': dp_pad_min, dp_pad_max = data[ 'CELL_PAD_IN_SITES_DETAIL_PLACEMENT']['minmax'] dp_pad_step = data['CELL_PAD_IN_SITES_DETAIL_PLACEMENT']['step'] if dp_pad_step == 1: config[ 'CELL_PAD_IN_SITES_DETAIL_PLACEMENT'] = tune.sample_from( lambda spec: tune.randint( dp_pad_min, spec.config. CELL_PAD_IN_SITES_GLOBAL_PLACEMENT + 1)) if dp_pad_step > 1: config[ 'CELL_PAD_IN_SITES_DETAIL_PLACEMENT'] = tune.sample_from( lambda spec: tune.choice( np.adarray.tolist( np.arange( dp_pad_min, spec.config. CELL_PAD_IN_SITES_GLOBAL_PLACEMENT + 1, dp_pad_step)))) return config
def sweep_baseline(num_samples): # see here https://github.com/ray-project/ray/issues/7084 ray.init(webui_host='127.0.0.1', object_store_memory=OBJECT_STORE_MINIMUM_MEMORY_BYTES) train_set_specs = [esaote_train, philips_train] base_config = {'prediction_target': 'Class', 'backend': 'resnet-18', 'n_epochs': 15, 'neptune_project': 'createrandom/mus-imageagg', 'batch_size': 32} for train_set_spec in train_set_specs: total_config = {**base_config, **train_set_spec} image_sweep_config = {"lr": sample_from(lambda x: random.uniform(0.001, 0.1)), "backend_mode": sample_from(lambda x: random.choice(['finetune', 'scratch']))} config = {**total_config, **image_sweep_config} tune.run(train_image_level, config=config, num_samples=num_samples, resources_per_trial={"gpu": 1, "cpu": 8}) ray.shutdown()
def cifar10_experiment(dataset, model, args, optimizer, use_hyperband, lr, lr_decay, weight_decay, ntrials, nmaxepochs, batch, resume_pth, result_dir, cuda, smoke_test): assert optimizer in ['Adam', 'SGD'], 'Only Adam and SGD are supported' if lr_decay is None: lr_decay = {'factor': 1.0, 'period': 1000, 'milestones': None} config={ 'optimizer': optimizer, 'switch_ams': int(0.5 * nmaxepochs) if optimizer == 'Adam' else None, 'lr': grid_search(lr['grid']) if lr['grid'] is not None else sample_from(lambda spec: math.exp(random.uniform(math.log(lr['min']), math.log(lr['max'])))), # 'lr_decay_factor': 0.2 if lr_decay else 1.0, # 'lr_decay_period': lr_decay_period if lr_decay else 10000, # 'decay_milestones': decay_milestones, 'lr_decay' : lr_decay, 'weight_decay': 5e-4 if weight_decay else 0.0, 'seed': sample_from(lambda spec: random.randint(0, 1 << 16)), 'device': 'cuda' if cuda else 'cpu', 'model': {'name': model, 'args': args}, 'dataset': {'name': dataset, 'batch': batch}, } smoke_str = 'smoke_' if smoke_test else '' # for easy finding and deleting unimportant logs args_str = '_'.join([k+':'+str(v) for k,v in args.items()]) timestamp = datetime.datetime.now().replace(microsecond=0).isoformat() commit_id = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8') experiment = RayExperiment( name=f'{smoke_str}{dataset.lower()}_{model}_{args_str}_{optimizer}_epochs_{nmaxepochs}_{timestamp}_{commit_id}', run=TrainableModel, local_dir=result_dir, num_samples=ntrials if not smoke_test else 1, checkpoint_at_end=True, checkpoint_freq=1000, # Just to enable recovery with @max_failures max_failures=0, resources_per_trial={'cpu': 4, 'gpu': 1 if cuda else 0}, stop={"training_iteration": 1 if smoke_test else nmaxepochs}, restore=resume_pth, config=config, ) return experiment
def get_variant_spec_image(universe, domain, task, policy, algorithm, *args, **kwargs): variant_spec = get_variant_spec_base(universe, domain, task, policy, algorithm, *args, **kwargs) if is_image_env(universe, domain, task, variant_spec): preprocessor_params = { 'class_name': 'convnet_preprocessor', 'config': { 'conv_filters': (64, ) * 3, 'conv_kernel_sizes': (3, ) * 3, 'conv_strides': (2, ) * 3, 'normalization_type': 'layer', 'downsampling_type': 'conv', }, } variant_spec['policy_params']['config']['hidden_layer_sizes'] = (400, 300) variant_spec['policy_params']['config']['preprocessors'] = { 'pixels': deepcopy(preprocessor_params) } variant_spec['Q_params']['config']['hidden_layer_sizes'] = ( tune.sample_from(lambda spec: (deepcopy( spec.get('config', spec)['policy_params']['config'][ 'hidden_layer_sizes'])))) variant_spec['Q_params']['config']['preprocessors'] = tune.sample_from( lambda spec: ( deepcopy( spec.get('config', spec)['policy_params']['config'][ 'preprocessors']), None, # Action preprocessor is None )) return variant_spec
def search_space(self, all_available_features): return { "selected_features": json.dumps(all_available_features), "model": "MTNet", "lr": 0.001, "batch_size": 16, "epochs": 1, "cnn_dropout": 0.2, "rnn_dropout": 0.2, "time_step": tune.choice([3, 4]), "cnn_height": 2, "long_num": tune.choice([3, 4]), "ar_size": tune.choice([2, 3]), "past_seq_len": tune.sample_from(lambda spec: (spec.config.long_num + 1) * spec.config.time_step), }
def hyper(train): """A random search to find good hyperparameters in Bansal et al's environments.""" train = dict(train) _sparse_reward(train) # Checkpoints take up a lot of disk space, only save every ~500k steps train['checkpoint_interval'] = 2 ** 19 train['total_timesteps'] = int(3e6) spec = { 'config': { 'env_name': tune.grid_search( ['multicomp/KickAndDefend-v0', 'multicomp/SumoHumans-v0'] ), 'victim_path': tune.sample_from( lambda spec: TARGET_VICTIM[spec.config.env_name] ), 'seed': tune.sample_from( lambda spec: np.random.randint(1000) ), # Dec 2018 experiments used 2^11 = 2048 batch size. # Aurick Zhou used 2^14 = 16384; Bansal et al use 409600 ~= 2^19. 'batch_size': tune.sample_from( lambda spec: 2 ** np.random.randint(11, 16) ), 'rl_args': { # PPO2 default is 0.01. run_humanoid.py uses 0.00. 'ent_coef': tune.sample_from( lambda spec: np.random.uniform(low=0.00, high=0.02) ), # nminibatches must be a factor of batch size; OK provided power of two # PPO2 default is 2^2 = 4; run_humanoid.py is 2^5 = 32 'nminibatches': tune.sample_from( lambda spec: 2 ** (np.random.randint(0, 7)) ), # PPO2 default is 4; run_humanoid.py is 10 'noptepochs': tune.sample_from( lambda spec: np.random.randint(1, 11), ), }, # PPO2 default is 3e-4; run_humanoid uses 1e-4; # Bansal et al use 1e-2 (but with huge batch size). # Sample log-uniform between 1e-2 and 1e-5. 'learning_rate': tune.sample_from( lambda spec: 10 ** (-2 + -3 * np.random.random()) ), }, 'num_samples': 100, } exp_name = 'hyper' _ = locals() # quieten flake8 unused variable warning del _
grace_period=20) tune.register_trainable("train_mnist", lambda cfg, rprtr: train_mnist(args, cfg, rprtr)) tune.run_experiments( { "exp": { "stop": { "mean_accuracy": 0.99, "timesteps_total": 10 if args.smoke_test else 300 }, "run": "train_mnist", "num_samples": 1 if args.smoke_test else 10, "resources_per_trial": { "cpu": args.threads, "gpu": 0.5 if args.use_gpu else 0 }, "config": { "lr": tune.sample_from( lambda spec: np.random.uniform(0.001, 0.1)), "momentum": tune.sample_from( lambda spec: np.random.uniform(0.1, 0.9)), "hidden": tune.sample_from( lambda spec: np.random.randint(32, 512)), "dropout1": tune.sample_from( lambda spec: np.random.uniform(0.2, 0.8)), } } }, verbose=0, scheduler=sched)
train_spec = { "run": Cifar10Model, "resources_per_trial": { "cpu": 1, "gpu": 1 }, "stop": { "mean_accuracy": 0.80, "training_iteration": 30, }, "config": { "epochs": 1, "batch_size": 64, "lr": grid_search([10**-4, 10**-5]), "decay": sample_from(lambda spec: spec.config.lr / 100.0), "dropout": grid_search([0.25, 0.5]), }, "num_samples": 4, } if args.smoke_test: train_spec["config"]["lr"] = 10**-4 train_spec["config"]["dropout"] = 0.5 ray.init() pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="mean_accuracy", perturbation_interval=10,
# !!! Example of using the ray.tune Python API !!! if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( '--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable("my_class", TrainMNIST) mnist_spec = { 'run': 'my_class', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'learning_rate': sample_from( lambda spec: 10**np.random.uniform(-5, -3)), 'activation': grid_search(['relu', 'elu', 'tanh']), }, "num_samples": 10, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 20 mnist_spec['num_samples'] = 2 ray.init() hyperband = HyperBandScheduler( time_attr="training_iteration", reward_attr="mean_accuracy", max_t=10) run_experiments({'mnist_hyperband_test': mnist_spec}, scheduler=hyperband)
from ray import tune from ray.tune.schedulers import HyperBandScheduler ray.init() sched = HyperBandScheduler( time_attr="training_iteration", reward_attr="neg_mean_loss") tune.run_experiments( { "exp": { "stop": { "mean_accuracy": 0.95, "training_iteration": 1 if args.smoke_test else 20, }, "resources_per_trial": { "cpu": 3 }, "run": TrainMNIST, "num_samples": 1 if args.smoke_test else 20, "checkpoint_at_end": True, "config": { "args": args, "lr": tune.sample_from( lambda spec: np.random.uniform(0.001, 0.1)), "momentum": tune.sample_from( lambda spec: np.random.uniform(0.1, 0.9)), } } }, verbose=0, scheduler=sched)
ray.init() run_experiments( { "pbt_humanoid_test": { "run": "PPO", "env": "Humanoid-v1", "num_samples": 8, "config": { "kl_coeff": 1.0, "num_workers": 8, "num_gpus": 1, "model": { "free_log_std": True }, # These params are tuned from a fixed starting value. "lambda": 0.95, "clip_param": 0.2, "lr": 1e-4, # These params start off randomly drawn from a set. "num_sgd_iter": sample_from( lambda spec: random.choice([10, 20, 30])), "sgd_minibatch_size": sample_from( lambda spec: random.choice([128, 512, 2048])), "train_batch_size": sample_from( lambda spec: random.choice([10000, 20000, 40000])) }, }, }, scheduler=pbt)
with open(checkpoint_path) as f: self.timestep = json.loads(f.read())["timestep"] if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() # Hyperband early stopping, configured with `episode_reward_mean` as the # objective and `training_iteration` as the time unit, # which is automatically filled by Tune. hyperband = HyperBandScheduler( time_attr="training_iteration", reward_attr="episode_reward_mean", max_t=100) exp = Experiment( name="hyperband_test", run=MyTrainableClass, num_samples=20, stop={"training_iteration": 1 if args.smoke_test else 99999}, config={ "width": sample_from(lambda spec: 10 + int(90 * random.random())), "height": sample_from(lambda spec: int(100 * random.random())) }) run_experiments(exp, scheduler=hyperband)