def launch(self): """Actual entry point into the class instance where everything happens. Lots of delegating to classes that are in subclass or can be over-ridden. """ self.register_env_creator() # All worker nodes will block at this step during training ray_cluster_config = self.ray_init_config() if not self.is_master_node: return # Start the driver on master node ray.init(**ray_cluster_config) experiment_config = self.get_experiment_config() experiment_config = self.customize_experiment_config(experiment_config) print("Running experiment with config %s" % json.dumps(experiment_config, indent=2)) run_experiments(experiment_config) all_wokers_host_names = self.get_all_host_names()[1:] # If distributed job, send TERMINATION_SIGNAL to all workers. if len(all_wokers_host_names) > 0: self.sage_cluster_communicator.create_s3_signal(TERMINATION_SIGNAL) algo = experiment_config["training"]["run"] env_string = experiment_config["training"]["config"]["env"] config = experiment_config["training"]["config"] self.save_checkpoint_and_serving_model(algorithm=algo, env_string=env_string, config=config)
def run(): run_experiments( { "foo": { "run": MyResettableClass, "max_failures": 1, "num_samples": 4, "config": { "fake_reset_not_supported": True }, } }, reuse_actors=True, scheduler=FrequentPausesScheduler())
def test_cluster_rllib_restore(start_connected_cluster, tmpdir): cluster = start_connected_cluster dirpath = str(tmpdir) script = """ import time import ray from ray import tune ray.init(redis_address="{redis_address}") kwargs = dict( run="PG", env="CartPole-v1", stop=dict(training_iteration=10), local_dir="{checkpoint_dir}", checkpoint_freq=1, max_failures=1) tune.run_experiments( dict(experiment=kwargs), raise_on_failed_trial=False) """.format( redis_address=cluster.redis_address, checkpoint_dir=dirpath) run_string_as_driver_nonblocking(script) # Wait until the right checkpoint is saved. # The trainable returns every 0.5 seconds, so this should not miss # the checkpoint. metadata_checkpoint_dir = os.path.join(dirpath, "experiment") for i in range(100): if TrialRunner.checkpoint_exists(metadata_checkpoint_dir): # Inspect the internal trialrunner runner = TrialRunner.restore(metadata_checkpoint_dir) trials = runner.get_trials() last_res = trials[0].last_result if last_res and last_res.get("training_iteration"): break time.sleep(0.3) if not TrialRunner.checkpoint_exists(metadata_checkpoint_dir): raise RuntimeError("Checkpoint file didn't appear.") ray.shutdown() cluster.shutdown() cluster = _start_new_cluster() cluster.wait_for_nodes() # Restore properly from checkpoint trials2 = tune.run_experiments( { "experiment": { "run": "PG", "checkpoint_freq": 1, "local_dir": dirpath } }, resume=True) assert all(t.status == Trial.TERMINATED for t in trials2) cluster.shutdown()
def test_ls(start_ray, capsys, tmpdir): """This test captures output of list_trials.""" experiment_name = "test_ls" experiment_path = os.path.join(str(tmpdir), experiment_name) num_samples = 2 with capsys.disabled(): tune.run_experiments({ experiment_name: { "run": "__fake", "stop": { "training_iteration": 1 }, "num_samples": num_samples, "local_dir": str(tmpdir) } }) commands.list_trials(experiment_path, info_keys=("status", )) captured = capsys.readouterr().out.strip() lines = captured.split("\n") assert sum("TERMINATED" in line for line in lines) == num_samples
def test_lsx(start_ray, capsys, tmpdir): """This test captures output of list_experiments.""" project_path = str(tmpdir) num_experiments = 3 for i in range(num_experiments): experiment_name = "test_lsx{}".format(i) with capsys.disabled(): tune.run_experiments({ experiment_name: { "run": "__fake", "stop": { "training_iteration": 1 }, "num_samples": 1, "local_dir": project_path } }) commands.list_experiments(project_path, info_keys=("total_trials", )) captured = capsys.readouterr().out.strip() lines = captured.split("\n") assert sum("1" in line for line in lines) >= 3
def testTrialReuseEnabled(self): trials = run_experiments( { "foo": { "run": MyResettableClass, "num_samples": 4, "config": {}, } }, reuse_actors=True, scheduler=FrequentPausesScheduler()) self.assertEqual([t.last_result["num_resets"] for t in trials], [1, 2, 3, 4])
def test_cluster_down_full(start_connected_cluster, tmpdir): """Tests that run_experiment restoring works on cluster shutdown.""" cluster = start_connected_cluster dirpath = str(tmpdir) exp1_args = dict( run="__fake", stop=dict(training_iteration=3), local_dir=dirpath, checkpoint_freq=1) exp2_args = dict(run="__fake", stop=dict(training_iteration=3)) exp3_args = dict( run="__fake", stop=dict(training_iteration=3), config=dict(mock_error=True)) exp4_args = dict( run="__fake", stop=dict(training_iteration=3), config=dict(mock_error=True), checkpoint_freq=1) all_experiments = { "exp1": exp1_args, "exp2": exp2_args, "exp3": exp3_args, "exp4": exp4_args } tune.run_experiments(all_experiments, raise_on_failed_trial=False) ray.shutdown() cluster.shutdown() cluster = _start_new_cluster() trials = tune.run_experiments( all_experiments, resume=True, raise_on_failed_trial=False) assert len(trials) == 4 assert all(t.status in [Trial.TERMINATED, Trial.ERROR] for t in trials) cluster.shutdown()
cluster = Cluster() for i in range(num_nodes): cluster.add_node( redis_port=6379 if i == 0 else None, num_redis_shards=num_redis_shards if i == 0 else None, num_cpus=20, num_gpus=0, resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) ray.init(redis_address=cluster.redis_address) # Run the workload. run_experiments({ "apex": { "run": "APEX", "env": "Pong-v0", "config": { "num_workers": 8, "num_gpus": 0, "buffer_size": 10000, "learning_starts": 0, "sample_batch_size": 1, "train_batch_size": 1, "min_iter_time_s": 10, "timesteps_per_iteration": 10, }, } })
ray.init() MultiPendulum = make_multiagent("Pendulum-v0") register_env("multi_pend", lambda _: MultiPendulum(1)) trials = run_experiments({ "test": { "run": "PPO", "env": "multi_pend", "stop": { "timesteps_total": 500000, "episode_reward_mean": -200, }, "config": { "train_batch_size": 2048, "vf_clip_param": 10.0, "num_workers": 0, "num_envs_per_worker": 10, "lambda": 0.1, "gamma": 0.95, "lr": 0.0003, "sgd_minibatch_size": 64, "num_sgd_iter": 10, "model": { "fcnet_hiddens": [64, 64], }, "batch_mode": "complete_episodes", }, } }) if trials[0].last_result["episode_reward_mean"] < -200: raise ValueError("Did not get to -200 reward", trials[0].last_result)
def test_cluster_interrupt(start_connected_cluster, tmpdir): """Tests run_experiment on cluster shutdown with actual interrupt. This is an end-to-end test. """ cluster = start_connected_cluster dirpath = str(tmpdir) # Needs to be in scope for pytest class _Mock(tune.Trainable): """Finishes on the 4th iteration.""" def setup(self, config): self.state = {"hi": 0} def step(self): self.state["hi"] += 1 time.sleep(0.5) return {"done": self.state["hi"] >= 4} def save_checkpoint(self, path): return self.state def load_checkpoint(self, state): self.state = state # Removes indent from class. reformatted = "\n".join(line[4:] if len(line) else line for line in inspect.getsource(_Mock).split("\n")) script = """ import os import time import ray from ray import tune os.environ["TUNE_GLOBAL_CHECKPOINT_S"] = "0" ray.init(address="{address}") {fail_class_code} tune.run( {fail_class}, name="experiment", stop=dict(training_iteration=5), local_dir="{checkpoint_dir}", checkpoint_freq=1, max_failures=1, raise_on_failed_trial=False) """.format(address=cluster.address, checkpoint_dir=dirpath, fail_class_code=reformatted, fail_class=_Mock.__name__) run_string_as_driver_nonblocking(script) # Wait until the right checkpoint is saved. # The trainable returns every 0.5 seconds, so this should not miss # the checkpoint. local_checkpoint_dir = os.path.join(dirpath, "experiment") for i in range(50): if TrialRunner.checkpoint_exists(local_checkpoint_dir): # Inspect the internal trialrunner runner = TrialRunner(resume="LOCAL", local_checkpoint_dir=local_checkpoint_dir) trials = runner.get_trials() last_res = trials[0].last_result if last_res and last_res.get("training_iteration") == 3: break time.sleep(0.2) if not TrialRunner.checkpoint_exists(local_checkpoint_dir): raise RuntimeError("Checkpoint file didn't appear.") ray.shutdown() cluster.shutdown() cluster = _start_new_cluster() Experiment.register_if_needed(_Mock) # Inspect the internal trialrunner runner = TrialRunner(resume="LOCAL", local_checkpoint_dir=local_checkpoint_dir) trials = runner.get_trials() assert trials[0].last_result["training_iteration"] == 3 assert trials[0].status == Trial.PENDING # Restore properly from checkpoint trials2 = tune.run_experiments( { "experiment": { "run": _Mock, "local_dir": dirpath, "checkpoint_freq": 1 } }, resume=True, raise_on_failed_trial=False) assert all(t.status == Trial.TERMINATED for t in trials2) assert {t.trial_id for t in trials2} == {t.trial_id for t in trials} ray.shutdown() cluster.shutdown()
register_env("SinglePairTrading-v0", lambda config: SinglePairTradingEnv(config)) ray.init() run_experiments({ "clipparam_{}_{}_{}_wallet-{}-{}".format( FLAGS.symbol + FLAGS.to_symbol, FLAGS.limit, FLAGS.histo, WALLET_FIRST_SYMBOL, WALLET_SECOND_SYMBOL): { "run": FLAGS.algo, "env": "SinglePairTrading-v0", "stop": { "timesteps_total": 1e6, #1e6 = 1M }, "checkpoint_freq": 100, "checkpoint_at_end": True, "config": { "lr": grid_search([ 1e-4 # 1e-6 ]), "num_workers": 3, # parallelism 'observation_filter': 'MeanStdFilter', "vf_clip_param": 10000000.0, "env_config": { 'keys': keys, 'symbols': symbols, 'first_coin': FLAGS.symbol, 'second_coin': FLAGS.to_symbol }, } } })
if args.torch: deprecation_warning(old="--torch", new="--framework=torch") exp["config"]["framework"] = "torch" args.framework = "torch" # Print out the actual config. print("== Test config ==") print(yaml.dump(experiments)) # Try running each test 3 times and make sure it reaches the given # reward. passed = False for i in range(3): try: ray.init(num_cpus=5, local_mode=args.local_mode) trials = run_experiments(experiments, resume=False, verbose=2) finally: ray.shutdown() _register_all() for t in trials: if (t.last_result["episode_reward_mean"] >= t.stopping_criterion["episode_reward_mean"]): passed = True break if passed: print("Regression test PASSED") break else: print("Regression test FAILED on attempt {}".format(i + 1))
parser = argparse.ArgumentParser() parser.add_argument( '--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable("my_class", TrainMNIST) mnist_spec = { 'run': 'my_class', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'learning_rate': lambda spec: 10 ** np.random.uniform(-5, -3), 'activation': grid_search(['relu', 'elu', 'tanh']), }, "repeat": 10, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 2 mnist_spec['repeat'] = 2 ray.init() hyperband = HyperBandScheduler( time_attr="timesteps_total", reward_attr="mean_accuracy", max_t=100) run_experiments( {'mnist_hyperband_test': mnist_spec}, scheduler=hyperband)
last_layer, training=input_dict["is_training"]) output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer if __name__ == "__main__": args = parser.parse_args() ray.init() ModelCatalog.register_custom_model("bn_model", BatchNormModel) run_experiments({ "batch_norm_demo": { "run": args.run, "env": "Pendulum-v0" if args.run == "DDPG" else "CartPole-v0", "stop": { "training_iteration": args.num_iters }, "config": { "model": { "custom_model": "bn_model", }, "num_workers": 0, }, }, })
def checkAndReturnConsistentLogs(self, results, sleep_per_iter=None): """Checks logging is the same between APIs. Ignore "DONE" for logging but checks that the scheduler is notified properly with the last result. """ class_results = copy.deepcopy(results) function_results = copy.deepcopy(results) class_output = [] function_output = [] scheduler_notif = [] class MockScheduler(FIFOScheduler): def on_trial_complete(self, runner, trial, result): scheduler_notif.append(result) class ClassAPILogger(Logger): def on_result(self, result): class_output.append(result) class FunctionAPILogger(Logger): def on_result(self, result): function_output.append(result) class _WrappedTrainable(Trainable): def _setup(self, config): del config self._result_iter = copy.deepcopy(class_results) def _train(self): if sleep_per_iter: time.sleep(sleep_per_iter) res = self._result_iter.pop(0) # This should not fail if not self._result_iter: # Mark "Done" for last result res[DONE] = True return res def _function_trainable(config, reporter): for result in function_results: if sleep_per_iter: time.sleep(sleep_per_iter) reporter(**result) class_trainable_name = "class_trainable" register_trainable(class_trainable_name, _WrappedTrainable) trials = run_experiments( { "function_api": { "run": _function_trainable, "loggers": [FunctionAPILogger], }, "class_api": { "run": class_trainable_name, "loggers": [ClassAPILogger], }, }, raise_on_failed_trial=False, scheduler=MockScheduler()) # Ignore these fields NO_COMPARE_FIELDS = { HOSTNAME, NODE_IP, TRIAL_ID, EXPERIMENT_TAG, PID, TIME_THIS_ITER_S, TIME_TOTAL_S, DONE, # This is ignored because FunctionAPI has different handling "timestamp", "time_since_restore", "experiment_id", "date", } self.assertEqual(len(class_output), len(results)) self.assertEqual(len(function_output), len(results)) def as_comparable_result(result): return { k: v for k, v in result.items() if k not in NO_COMPARE_FIELDS } function_comparable = [ as_comparable_result(result) for result in function_output ] class_comparable = [ as_comparable_result(result) for result in class_output ] self.assertEqual(function_comparable, class_comparable) self.assertEqual(sum(t.get(DONE) for t in scheduler_notif), 2) self.assertEqual(as_comparable_result(scheduler_notif[0]), as_comparable_result(scheduler_notif[1])) # Make sure the last result is the same. self.assertEqual(as_comparable_result(trials[0].last_result), as_comparable_result(trials[1].last_result)) return function_output, trials
def f(): run_experiments({"foo": {"run": "PPO", "stop": {"asdf": 1}}})
def f(): run_experiments({"foo": { "run": "asdf", }})
def f(): run_experiments( {"foo": { "run": grid_search("invalid grid search"), }})
def f(): run_experiments( {"foo": { "run": "asdf", "bah": "this param is not allowed", }})
def f(): run_experiments({"foo": {}})
object_store_memory = 10**9 num_nodes = 3 message = ("Make sure there is enough memory on this machine to run this " "workload. We divide the system memory by 2 to provide a buffer.") assert (num_nodes * object_store_memory + num_redis_shards * redis_max_memory < ray._private.utils.get_system_memory() / 2), message # Simulate a cluster on one machine. ray.init(address="auto") # Run the workload. run_experiments({ "ppo": { "run": "PPO", "env": "CartPole-v0", "num_samples": 10000, "config": { "framework": "torch", "num_workers": 7, "num_gpus": 0, "num_sgd_iter": 1, }, "stop": { "timesteps_total": 1, }, } })
config = { "num_gpus": 0, "num_workers": 2, "optimizer": { "num_replay_buffer_shards": 1, }, "min_iter_time_s": 3, "buffer_size": 1000, "learning_starts": 1000, "train_batch_size": 128, "sample_batch_size": 32, "target_network_update_freq": 500, "timesteps_per_iteration": 1000, } group = True else: config = {} group = False ray.init() run_experiments({ "two_step": { "run": args.run, "env": "grouped_twostep" if group else TwoStepGame, "stop": { "timesteps_total": args.stop, }, "config": config, }, })
run_experiments({ "carla-dqn": { "run": "DQN", "env": "carla_env", "resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { "custom_model": "carla", "custom_options": { "image_shape": [ 80, 80, lambda spec: spec.config.env_config.framestack * ( spec.config.env_config.use_depth_camera and 1 or 3 ), ], }, "conv_filters": [ [16, [8, 8], 4], [32, [4, 4], 2], [512, [10, 10], 1], ], }, "timesteps_per_iteration": 100, "learning_starts": 1000, "schedule_max_timesteps": 100000, "gamma": 0.8, "tf_session_args": { "gpu_options": {"allow_growth": True}, }, }, }, })
def f(): run_experiments({"foo": { "run": "f1", }})
sched = AsyncHyperBandScheduler( time_attr="training_iteration", reward_attr="neg_mean_loss", max_t=400, grace_period=20) tune.register_trainable("train_mnist", lambda cfg, rprtr: train_mnist(args, cfg, rprtr)) tune.run_experiments( { "exp": { "stop": { "mean_accuracy": 0.98, "training_iteration": 1 if args.smoke_test else 20 }, "resources_per_trial": { "cpu": 3, "gpu": int(not args.no_cuda) }, "run": "train_mnist", "num_samples": 1 if args.smoke_test else 10, "config": { "lr": tune.sample_from( lambda spec: np.random.uniform(0.001, 0.1)), "momentum": tune.sample_from( lambda spec: np.random.uniform(0.1, 0.9)), } } }, verbose=0, scheduler=sched)
def custom_stats(self): return { "policy_loss": self.policy_loss, "imitation_loss": self.imitation_loss, } if __name__ == "__main__": ray.init() args = parser.parse_args() ModelCatalog.register_custom_model("custom_loss", CustomLossModel) run_experiments({ "custom_loss": { "run": "PG", "env": "CartPole-v0", "stop": { "training_iteration": args.iters, }, "config": { "num_workers": 0, "model": { "custom_model": "custom_loss", "custom_options": { "input_files": args.input_files, }, }, }, }, })
create_env, env_name = make_create_env(params=flow_params, version=0) env_name = env_name + '_[FLOW_RATE, FLOW_RATE_MERGE, RL_PENETRATION]:[{},{},{:.3f}]'.format( FLOW_RATE, FLOW_RATE_MERGE, RL_PENETRATION) # Register as rllib env register_env(env_name, create_env) checkpoint_path = base + '/checkpoint_{}/checkpoint-{}'.format( checkpoint, checkpoint) config_path = base + '/params.pkl' with open(config_path, mode='rb') as f: config = pickle.load(f) exp_tag = { "run": 'PPO', "env": env_name, "config": { **config }, "checkpoint_freq": 25, "checkpoint_at_end": True, "max_failures": 999, "stop": { "training_iteration": checkpoint + 50 }, "restore": checkpoint_path, "num_samples": 1, } exp_list.append(Experiment.from_json(args.exp_tag, exp_tag)) trials = run_experiments(experiments=exp_list)
register_env(env_name, lambda env_config: CarlaEnv(env_config)) register_carla_model() run_experiments({ "carla-a3c": { "run": "A3C", "env": "carla_env", "resources": { "cpu": 4, "gpu": 1 }, "config": { "env_config": env_config, "model": { "custom_model": "carla", "custom_options": { "image_shape": [80, 80, 6], }, "conv_filters": [ [16, [8, 8], 4], [32, [4, 4], 2], [512, [10, 10], 1], ], }, "gamma": 0.8, "num_workers": 1, }, }, })
for i in range(args.num_iters): if i % 10 == 0: start = time.time() loss = sgd.step(fetch_stats=True)["loss"] metrics = sgd.foreach_model(lambda model: model.get_metrics()) acc = [m["accuracy"] for m in metrics] print("Iter", i, "loss", loss, "accuracy", acc) print("Time per iteration", time.time() - start) assert len(set(acc)) == 1, ("Models out of sync", acc) reporter(timesteps_total=i, mean_loss=loss, mean_accuracy=acc[0]) else: sgd.step() if __name__ == "__main__": args = parser.parse_args() ray.init(redis_address=args.redis_address) if args.tune: run_experiments({ "mnist_sgd": { "run": train_mnist, "config": { "args": args, }, }, }) else: train_mnist({"args": args}, lambda **kw: None)
ray.init(redis_address=redis_address) run_experiments({ "carla-a3c": { "run": "A3C", "env": "carla_env", "resources": { "cpu": 5, "gpu": 2, "driver_gpu_limit": 0 }, "config": { "env_config": env_config, "use_gpu_for_workers": True, "model": { "custom_model": "carla", "custom_options": { "image_shape": [80, 80, 6], }, "conv_filters": [ [16, [8, 8], 4], [32, [4, 4], 2], [512, [10, 10], 1], ], }, "gamma": 0.95, "num_workers": 2, }, }, })
default=2, help="How many gpus do you want ? Because we own too many") args = parser.parse_args() set_seed(args.seed) ray.init(object_id_seed=args.seed, num_gpus=args.n_gpu, num_cpus=args.n_cpu) full_config = load_config(env_config_file=args.env_config, model_config_file=args.model_config, env_ext_file=args.env_ext, model_ext_file=args.model_ext) if args.pbt_config: assert args.grid_config == None, "Cannot set Population based training and grid search, choose wisely" full_config, pbt_scheduler = prepare_pbt_config(full_config, args.pbt_config) else: pbt_scheduler = None if args.grid_config: full_config = grid_search_overriding(full_config, args.grid_config) experiment = create_expe_spec(full_config, n_cpu=args.n_cpu, n_gpu=args.n_gpu, exp_dir=args.exp_dir) tune.run_experiments(experiments=experiment, scheduler=pbt_scheduler, queue_trials=True)
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config if __name__ == "__main__": alg_run, gym_name, config = setup_exps() ray.init(num_cpus=N_CPUS + 1, object_store_memory = 2048*1024*1024) trials = run_experiments({ flow_params["exp_tag"]: { "run": alg_run, "env": gym_name, "config": { **config }, "checkpoint_freq": 1, "checkpoint_at_end": True, "max_failures": 999, "stop": { "training_iteration": 1, }, } })
args, _ = parser.parse_known_args() mnist_spec = { 'run': train, 'num_samples': 10, 'stop': { 'mean_accuracy': 0.99, 'timesteps_total': 600, }, 'config': { 'activation': grid_search(['relu', 'elu', 'tanh']), }, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 2 mnist_spec['num_samples'] = 1 ray.init() from ray.tune.schedulers import AsyncHyperBandScheduler run_experiments( { 'tune_mnist_test': mnist_spec }, scheduler=AsyncHyperBandScheduler( time_attr="timesteps_total", reward_attr="mean_accuracy", max_t=600, ))
config.update({ 'multiagent': { 'policies': policy_graphs, 'policy_mapping_fn': tune.function(policy_mapping_fn), 'policies_to_train': ['av'] } }) return alg_run, env_name, config # RUN EXPERIMENT if __name__ == '__main__': alg_run, env_name, config = setup_exps(flow_params) ray.init(num_cpus=N_CPUS + 1) run_experiments({ flow_params['exp_tag']: { 'run': alg_run, 'env': env_name, 'checkpoint_freq': 20, 'checkpoint_at_end': True, 'stop': { 'training_iteration': N_TRAINING_ITERATIONS }, 'config': config, }, })
result = subprocess.check_output( "ps aux | grep '{}' | grep -v grep || true".format(UNIQUE_CMD), shell=True) return result if __name__ == "__main__": register_env("subproc", lambda config: EnvWithSubprocess(config)) ray.init() assert os.path.exists(UNIQUE_FILE_0) assert os.path.exists(UNIQUE_FILE_1) assert not leaked_processes() run_experiments({ "demo": { "run": "PG", "env": "subproc", "num_samples": 1, "config": { "num_workers": 1, }, "stop": { "training_iteration": 1 }, }, }) leaked = leaked_processes() assert not leaked, "LEAKED PROCESSES: {}".format(leaked) assert not os.path.exists(UNIQUE_FILE_0), "atexit handler not called" assert not os.path.exists(UNIQUE_FILE_1), "atexit handler not called" print("OK")
import ray from ray.tune import run_experiments if __name__ == '__main__': ray.init() for test in sys.argv[1:]: experiments = yaml.load(open(test).read()) print("== Test config ==") print(yaml.dump(experiments)) for i in range(3): trials = run_experiments(experiments) num_failures = 0 for t in trials: if (t.last_result["episode_reward_mean"] < t.stopping_criterion["episode_reward_mean"]): num_failures += 1 if not num_failures: print("Regression test PASSED") sys.exit(0) print("Regression test flaked, retry", i) print("Regression test FAILED") sys.exit(1)
def step(self, action): assert action in [0, 1], action if action == 0 and self.cur_pos > 0: self.cur_pos -= 1 elif action == 1: self.cur_pos += 1 done = self.cur_pos >= self.end_pos return [self.cur_pos], 1 if done else 0, done, {} if __name__ == "__main__": # Can also register the env creator function explicitly with: # register_env("corridor", lambda config: SimpleCorridor(config)) ray.init() run_experiments({ "demo": { "run": "PPO", "env": SimpleCorridor, # or "corridor" if registered above "stop": { "timesteps_total": 10000, }, "config": { "lr": grid_search([1e-2, 1e-4, 1e-6]), # try different lrs "num_workers": 1, # parallelism "env_config": { "corridor_length": 5, }, }, }, })
cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(gym_name, create_env) return alg_run, gym_name, config if __name__ == '__main__': alg_run, gym_name, config = setup_exps() ray.init(num_cpus=N_CPUS + 1, redirect_output=False) trials = run_experiments({ flow_params['exp_tag']: { 'run': alg_run, 'env': gym_name, 'config': { **config }, 'checkpoint_freq': 20, 'max_failures': 999, 'stop': { 'training_iteration': 200, }, } })
num_cpus=10, num_gpus=0, resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) ray.init(redis_address=cluster.redis_address) # Run the workload. pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=10, hyperparam_mutations={ "lr": [0.1, 0.01, 0.001, 0.0001], }) run_experiments( { "pbt_test": { "run": "PG", "env": "CartPole-v0", "num_samples": 8, "config": { "lr": 0.01, }, } }, scheduler=pbt, verbose=False)
action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable("my_class", TrainMNIST) mnist_spec = { 'run': 'my_class', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'learning_rate': sample_from(lambda spec: 10**np.random.uniform(-5, -3)), 'activation': grid_search(['relu', 'elu', 'tanh']), }, "num_samples": 10, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 20 mnist_spec['num_samples'] = 2 ray.init() hyperband = HyperBandScheduler(time_attr="training_iteration", reward_attr="mean_accuracy", max_t=10) run_experiments({'mnist_hyperband_test': mnist_spec}, scheduler=hyperband)
pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=10, hyperparam_mutations={ # Allow for scaling-based perturbations, with a uniform backing # distribution for resampling. "factor_1": lambda: random.uniform(0.0, 20.0), # Allow perturbations within this set of categorical values. "factor_2": [1, 2], }) # Try to find the best factor 1 and factor 2 run_experiments( { "pbt_test": { "run": MyTrainableClass, "stop": { "training_iteration": 20 if args.smoke_test else 99999 }, "num_samples": 10, "config": { "factor_1": 4.0, "factor_2": 1.0, }, } }, scheduler=pbt, verbose=False)
config['eval_prob'] = 0.05 config['observation_filter'] = "NoFilter" # save the flow params for replay flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run # Register as rllib env register_env(env_name, create_env) trials = run_experiments({ flow_params["exp_tag"]: { "run": alg_run, "env": env_name, "config": { **config }, "checkpoint_freq": 25, "max_failures": 999, "stop": { "training_iteration": 500 }, "num_samples": 1, "upload_dir": "s3://<BUCKET NAME>" }, })
"scenarios": [LANE_KEEP], }) register_env(env_name, lambda env_config: CarlaEnv(env_config)) register_carla_model() ray.init() run_experiments({ "carla-a3c": { "run": "A3C", "env": "carla_env", "resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { "custom_model": "carla", "custom_options": { "image_shape": [80, 80, 6], }, "conv_filters": [ [16, [8, 8], 4], [32, [4, 4], 2], [512, [10, 10], 1], ], }, "gamma": 0.8, "num_workers": 1, }, }, })
from ray import tune from ray.tune.schedulers import HyperBandScheduler ray.init() sched = HyperBandScheduler( time_attr="training_iteration", reward_attr="neg_mean_loss") tune.run_experiments( { "exp": { "stop": { "mean_accuracy": 0.95, "training_iteration": 1 if args.smoke_test else 20, }, "trial_resources": { "cpu": 3 }, "run": TrainMNIST, "num_samples": 1 if args.smoke_test else 20, "checkpoint_at_end": True, "config": { "args": args, "lr": tune.sample_from( lambda spec: np.random.uniform(0.001, 0.1)), "momentum": tune.sample_from( lambda spec: np.random.uniform(0.1, 0.9)), } } }, verbose=0, scheduler=sched)
# Simulate a cluster on one machine. cluster = Cluster() for i in range(num_nodes): cluster.add_node( redis_port=6379 if i == 0 else None, num_redis_shards=num_redis_shards if i == 0 else None, num_cpus=10, num_gpus=0, resources={str(i): 2}, object_store_memory=object_store_memory, redis_max_memory=redis_max_memory) ray.init(redis_address=cluster.redis_address) # Run the workload. run_experiments({ "impala": { "run": "IMPALA", "env": "CartPole-v0", "config": { "num_workers": 8, "num_gpus": 0, "num_envs_per_worker": 5, "remote_worker_envs": True, "sample_batch_size": 50, "train_batch_size": 100, }, }, })
with open(checkpoint_path) as f: self.timestep = json.loads(f.read())["timestep"] if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() # Hyperband early stopping, configured with `episode_reward_mean` as the # objective and `training_iteration` as the time unit, # which is automatically filled by Tune. hyperband = HyperBandScheduler( time_attr="training_iteration", reward_attr="episode_reward_mean", max_t=100) exp = Experiment( name="hyperband_test", run=MyTrainableClass, num_samples=20, stop={"training_iteration": 1 if args.smoke_test else 99999}, config={ "width": sample_from(lambda spec: 10 + int(90 * random.random())), "height": sample_from(lambda spec: int(100 * random.random())) }) run_experiments(exp, scheduler=hyperband)
}, { 'name': 'height', 'type': 'int', 'bounds': { 'min': -100, 'max': 100 }, }, ] config = { "my_exp": { "run": "exp", "num_samples": 10 if args.smoke_test else 1000, "config": { "iterations": 100, }, "stop": { "timesteps_total": 100 }, } } algo = SigOptSearch( space, name="SigOpt Example Experiment", max_concurrent=1, reward_attr="neg_mean_loss") scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss") run_experiments(config, search_alg=algo, scheduler=scheduler)
# for i in range(num_nodes): # cluster.add_node(redis_port=6379 if i == 0 else None, # num_redis_shards=num_redis_shards if i == 0 else None, # num_cpus=20, # num_gpus=0, # resources={str(i): 2}, # object_store_memory=object_store_memory, # redis_max_memory=redis_max_memory, # dashboard_host="0.0.0.0") # ray.init(address=cluster.address) ray.init() # Run the workload. run_experiments({ "apex": { "run": "APEX", "env": "Pong-v0", "config": { "num_workers": 3, "num_gpus": 0, "buffer_size": 10000, "learning_starts": 0, "rollout_fragment_length": 1, "train_batch_size": 1, "min_iter_time_s": 10, "timesteps_per_iteration": 10, }, } })
#!/usr/bin/env python from __future__ import absolute_import from __future__ import division from __future__ import print_function import sys import ray from ray.tune import register_trainable, run_experiments def f(config, reporter): reporter(timesteps_total=1) if __name__ == "__main__": ray.init() register_trainable("my_class", f) run_experiments({ "test": { "run": "my_class", "stop": { "training_iteration": 1 } } }) assert 'ray.rllib' not in sys.modules, "RLlib should not be imported"
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--num-iters", type=int, default=2000) args = parser.parse_args() ray.init() trials = tune.run_experiments({ "test": { "env": "CartPole-v0", "run": "PG", "stop": { "training_iteration": args.num_iters, }, "config": { "callbacks": { "on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), "on_episode_end": tune.function(on_episode_end), "on_sample_end": tune.function(on_sample_end), "on_train_result": tune.function(on_train_result), }, }, } }) # verify custom metrics for integration tests custom_metrics = trials[0].last_result["custom_metrics"] print(custom_metrics) assert "pole_angle_mean" in custom_metrics assert "pole_angle_min" in custom_metrics assert "pole_angle_max" in custom_metrics
# save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json create_env, env_name = make_create_env(params=flow_params, version=0) # Register as rllib env register_env(env_name, create_env) trials = run_experiments({ flow_params["exp_tag"]: { "run": "PPO", "env": env_name, "config": { **config }, "checkpoint_freq": 5, "max_failures": 999, "stop": { "training_iteration": 200, }, "repeat": 3, "trial_resources": { "cpu": 1, "gpu": 0, "extra_cpu": PARALLEL_ROLLOUTS - 1, }, }, })
} elif args.run == "APEX_QMIX": config = { "num_gpus": 0, "num_workers": 2, "optimizer": { "num_replay_buffer_shards": 1, }, "min_iter_time_s": 3, "buffer_size": 1000, "learning_starts": 1000, "train_batch_size": 128, "sample_batch_size": 32, "target_network_update_freq": 500, "timesteps_per_iteration": 1000, } else: config = {} ray.init() run_experiments({ "two_step": { "run": args.run, "env": "grouped_twostep", "stop": { "timesteps_total": args.stop, }, "config": config, }, })
register_trainable("my_class", MyTrainableClass) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() # asynchronous hyperband early stopping, configured with # `episode_reward_mean` as the # objective and `timesteps_total` as the time unit. ahb = AsyncHyperBandScheduler( time_attr="timesteps_total", reward_attr="episode_reward_mean", grace_period=5, max_t=100) run_experiments({ "asynchyperband_test": { "run": "my_class", "stop": {"training_iteration": 1 if args.smoke_test else 99999}, "repeat": 20, "resources": {"cpu": 1, "gpu": 0}, "config": { "width": lambda spec: 10 + int(90 * random.random()), "height": lambda spec: int(100 * random.random()), }, } }, scheduler=ahb)
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) # generating a string version of flow_params config['env_config'][ 'flow_params'] = flow_json # adding the flow_params to config dict config['env_config']['run'] = alg_run # Call the utility function make_create_env to be able to # register the Flow env for this experiment create_env, gym_name = make_create_env(params=flow_params, version=0) # Register as rllib env with Gym register_env(gym_name, create_env) trials = run_experiments({ flow_params["exp_tag"]: { "run": alg_run, "env": gym_name, "config": { **config }, "checkpoint_freq": 5, # number of iterations between checkpoints "checkpoint_at_end": True, # generate a checkpoint at the end "max_failures": 999, "stop": { # stopping conditions "training_iteration": 200, # number of iterations to stop after }, }, })
parser = argparse.ArgumentParser() parser.add_argument( "--smoke-test", action="store_true", help="Finish quickly for testing") args, _ = parser.parse_known_args() ray.init() pbt = PopulationBasedTraining( time_attr="training_iteration", reward_attr="episode_reward_mean", perturbation_interval=10, hyperparam_mutations={ # Allow for scaling-based perturbations, with a uniform backing # distribution for resampling. "factor_1": lambda: random.uniform(0.0, 20.0), # Allow perturbations within this set of categorical values. "factor_2": [1, 2], }) # Try to find the best factor 1 and factor 2 run_experiments({ "pbt_test": { "run": "my_class", "stop": {"training_iteration": 2 if args.smoke_test else 99999}, "repeat": 10, "resources": {"cpu": 1, "gpu": 0}, "config": { "factor_1": 4.0, "factor_2": 1.0, }, } }, scheduler=pbt, verbose=False)
#!/usr/bin/env python import sys import ray from ray.tune import register_trainable, run_experiments def f(config, reporter): reporter(timesteps_total=1) if __name__ == "__main__": ray.init(num_cpus=2) register_trainable("my_class", f) run_experiments({ "test": { "run": "my_class", "stop": { "training_iteration": 1 } } }) assert "ray.rllib" not in sys.modules, "RLlib should not be imported" assert "mlflow" not in sys.modules, "MLflow should not be imported"
from ray import tune from ray.tune.schedulers import HyperBandScheduler ray.init() sched = HyperBandScheduler( time_attr="training_iteration", reward_attr="neg_mean_loss") tune.run_experiments( { "exp": { "stop": { "mean_accuracy": 0.95, "training_iteration": 1 if args.smoke_test else 20, }, "resources_per_trial": { "cpu": 3 }, "run": TrainMNIST, "num_samples": 1 if args.smoke_test else 20, "checkpoint_at_end": True, "config": { "args": args, "lr": tune.sample_from( lambda spec: np.random.uniform(0.001, 0.1)), "momentum": tune.sample_from( lambda spec: np.random.uniform(0.1, 0.9)), } } }, verbose=0, scheduler=sched)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--num-iters", type=int, default=2000) args = parser.parse_args() ray.init() trials = tune.run_experiments({ "test": { "env": "CartPole-v0", "run": "PG", "stop": { "training_iteration": args.num_iters, }, "config": { "callbacks": { "on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), "on_episode_end": tune.function(on_episode_end), "on_sample_end": tune.function(on_sample_end), "on_train_result": tune.function(on_train_result), }, }, } }) # verify custom metrics for integration tests custom_metrics = trials[0].last_result["custom_metrics"] print(custom_metrics) assert "mean_pole_angle" in custom_metrics assert type(custom_metrics["mean_pole_angle"]) is float assert "callback_ok" in trials[0].last_result
'--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', help='Directory for storing input data') FLAGS, unparsed = parser.parse_known_args() tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) # !!! Example of using the ray.tune Python API !!! if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--smoke-test', action='store_true', help='Finish quickly for testing') args, _ = parser.parse_known_args() register_trainable('train_mnist', train) mnist_spec = { 'run': 'train_mnist', 'stop': { 'mean_accuracy': 0.99, 'time_total_s': 600, }, 'config': { 'activation': grid_search(['relu', 'elu', 'tanh']), }, } if args.smoke_test: mnist_spec['stop']['training_iteration'] = 2 ray.init() run_experiments({'tune_mnist_test': mnist_spec})
# save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) config['env_config']['flow_params'] = flow_json config['env_config']['run'] = alg_run # Register as rllib env register_env(env_name, create_env) exp_tag = { "run": alg_run, "env": env_name, "config": { **config }, "checkpoint_freq": 10, "max_failures": 999, "stop": { "training_iteration": 500 }, "num_samples": 1, } if upload_dir: exp_tag["upload_dir"] = "s3://" + upload_dir trials = run_experiments({ flow_params["exp_tag"]: exp_tag })