def test_es(): ray.init() config = { 'policy': DefaultPolicy, 'num_workers': 2, 'episodes_per_batch': 1, 'train_batch_size': 1, 'noise_size': 100000000, 'env_config': { 'max_num_steps': 100, }, 'model': { 'sparse': False, 's_dim': 128, } } register_env('my_env', env_creator) trainer = ESTrainer(config, 'my_env') res = dict() for i in range(5): res = trainer.train() assert res['training_iteration'] == 5 assert res['timesteps_total'] == 2000 assert res['info']['update_ratio'] > 0
def register_all_environments(): """Register all custom environments in Tune.""" from ray.tune import register_env from raylab.envs.registry import ENVS for name, env_creator in ENVS.items(): register_env(name, env_creator)
def test_avail_actions_qmix(self): grouping = { "group_1": ["agent_1", "agent_2"], } obs_space = Tuple([ AvailActionsTestEnv.observation_space, AvailActionsTestEnv.observation_space, ]) act_space = Tuple([ AvailActionsTestEnv.action_space, AvailActionsTestEnv.action_space ]) register_env( "action_mask_test", lambda config: AvailActionsTestEnv(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space), ) config = (QMixConfig().framework(framework="torch").environment( env="action_mask_test", env_config={ "avail_actions": [3, 4, 8] }, ).rollouts(num_envs_per_worker=5)) # Test with vectorization on. trainer = config.build() for _ in range(4): trainer.train() # OK if it doesn't trip the action assertion error assert trainer.train()["episode_reward_mean"] == 30.0 trainer.stop() ray.shutdown()
def fine_tune(config, run, env: RailEnv): """ Fine-tune the agent on a static env at evaluation time """ RailEnvPersister.save(env, CURRENT_ENV_PATH) num_agents = env.get_num_agents() tune_time = get_tune_time(num_agents) def env_creator(env_config): return FlatlandSparse(env_config, fine_tune_env_path=CURRENT_ENV_PATH, max_steps=num_agents * 100) register_env("flatland_sparse", env_creator) config['num_workers'] = 3 config['num_envs_per_worker'] = 1 config['lr'] = 0.00001 * num_agents exp_an = ray.tune.run(run["agent"], reuse_actors=True, verbose=1, stop={"time_since_restore": tune_time}, checkpoint_freq=1, keep_checkpoints_num=1, checkpoint_score_attr="episode_reward_mean", config=config, restore=run["checkpoint_path"]) trial: Trial = exp_an.trials[0] agent_config = trial.config agent_config['num_workers'] = 0 agent = trial.get_trainable_cls()(env=config["env"], config=trial.config) checkpoint = exp_an.get_trial_checkpoints_paths( trial, metric="episode_reward_mean") agent.restore(checkpoint[0][0]) return agent
def test_sac(): ray.init() config = { "model": { "use_lstm": False, }, "use_state_preprocessor": False, "learning_starts": 200, "normalize_actions": True, "rollout_fragment_length": 10, "timesteps_per_iteration": 100, "num_gpus": 1, "num_workers": 0, "evaluation_interval": 0, "monitor": False, } register_env("MinitaurEnv", lambda env_config: MinitaurGymEnv(**env_config)) trainer = SACTrainer(config, "MinitaurEnv") print(trainer.get_policy().model) for i in range(5): res = trainer.train() print(res) print(trainer._logdir) ray.shutdown()
def test_dynamics(): ray.init() config = { 'env': { 'max_num_steps': 200, }, 'use_dynamics': True, 'num_workers': 2, 'plan_horizon': 10, 'rollout_fragment_length': 50, "learning_starts": 200, "train_batch_size": 200, "train_every": 200, "num_sgd_iter": 3, "monitor": False } register_env('CheetahEnv', lambda env_config: CheetahEnv(**env_config)) trainer = A2CTrainer(config, 'CheetahEnv') print(trainer.get_policy().num_params) for i in range(5): res = trainer.train() assert res['timesteps_this_iter'] == config[ 'rollout_fragment_length'] * config['num_workers'] logger.info(res['info']) print(trainer._logdir) ray.shutdown()
def create_expe_spec(config, n_cpu, n_gpu, exp_dir): def _trial_name_creator(trial): return "{}_{}_123".format(trial.trainable_name, trial.trial_id) # Create env and register it, so ray and rllib can use it register_env(config["env_config"]["env"], lambda env_config: env_basic_creator(env_config)) expe_config = merge_env_algo_config(config) # Shouldn't be useful now, automatic in RLLIB #trial_resources = {"cpu": expe_config["num_workers"]+3, "gpu": expe_config["num_gpus"]} # expe_config["lr"] = grid_search([1e-3, 1e-4, 5e-4, 1e-5, 5e-5]) # expe_config["target_network_update_freq"] = grid_search([20000, 40000]) experiment = Experiment( name=config["name_expe"], run=config["algo"], stop=config["stop"], config=expe_config, num_samples=config.get("num_samples", 1), checkpoint_freq=10, max_failures=2, local_dir=exp_dir, # trial_name_creator=tune.function(_trial_name_creator) # todo : add when available ) return experiment
def test_avail_actions_qmix(self): grouping = { "group_1": ["agent_1", "agent_2"], } obs_space = Tuple([ AvailActionsTestEnv.observation_space, AvailActionsTestEnv.observation_space ]) act_space = Tuple([ AvailActionsTestEnv.action_space, AvailActionsTestEnv.action_space ]) register_env( "action_mask_test", lambda config: AvailActionsTestEnv(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space)) trainer = QMixTrainer( env="action_mask_test", config={ "num_envs_per_worker": 5, # test with vectorization on "env_config": { "avail_actions": [3, 4, 8], }, "framework": "torch", }) for _ in range(4): trainer.train() # OK if it doesn't trip the action assertion error assert trainer.train()["episode_reward_mean"] == 30.0 trainer.stop() ray.shutdown()
def main(): arg_params = parse_args() register_env("malmo", create_malmo) config = { 'mission': arg_params.mission, 'port': arg_params.port, 'server': arg_params.server, 'port2': arg_params.port2, 'server2': arg_params.server2, 'episodes': arg_params.episodes, 'episode': arg_params.episode, 'role': arg_params.role, 'episodemaxsteps': arg_params.episodemaxsteps, 'saveimagesteps': arg_params.saveimagesteps, 'resync': arg_params.resync, 'experimentUniqueId': arg_params.experimentUniqueId } env = create_malmo(config) ray.init(num_cpus=20) tune.run("IMPALA", stop={ "timesteps_total": 10000, }, config={ "env_config": config, "env": "malmo", "num_workers": 1, "num_gpus": 0 }) ray.shutdown()
def register_env(env_name, env_config={}, model_name=None): env = utils.create_env(env_name) tune.register_env(env_name, lambda env_name: env(env_name, env_config=env_config)) if model_name is None: model_name = env_name return model_name
def test_counting_by_agent_steps(self): """Test whether a PPOTrainer can be built with all frameworks.""" config = copy.deepcopy(ppo.DEFAULT_CONFIG) action_space = Discrete(2) obs_space = Box(float("-inf"), float("inf"), (4, ), dtype=np.float32) config["num_workers"] = 2 config["num_sgd_iter"] = 2 config["framework"] = "torch" config["rollout_fragment_length"] = 21 config["train_batch_size"] = 147 config["multiagent"] = { "policies": { "p0": (None, obs_space, action_space, {}), "p1": (None, obs_space, action_space, {}), }, "policy_mapping_fn": lambda aid: "p{}".format(aid), "count_steps_by": "agent_steps", } tune.register_env("ma_cartpole", lambda _: MultiAgentCartPole({"num_agents": 2})) num_iterations = 2 trainer = ppo.PPOTrainer(config=config, env="ma_cartpole") results = None for i in range(num_iterations): results = trainer.train() self.assertGreater(results["agent_timesteps_total"], num_iterations * config["train_batch_size"]) self.assertLess(results["agent_timesteps_total"], (num_iterations + 1) * config["train_batch_size"]) trainer.stop()
def register_retro(game, state, **kwargs): """Registers a given retro game as a ray environment The environment is registered with name 'retro-v0' """ env_creator = lambda env_config: make_env(game=game, state=state, **kwargs) register_env("retro-v0", env_creator) return partial(env_creator, {})
def main(): args = parser.parse_args() config = generate_config(args) # env = CityFlowEnvRay(config) # eng = cityflow.Engine(config["cityflow_config_file"], thread_num = config["thread_num"]) # config["eng"] = [eng,] # print(config["eng"]) num_agents = len(config["intersection_id"]) grouping = {"group_1": [id_ for id_ in config["intersection_id"]]} obs_space = Tuple( [CityFlowEnvRay.observation_space for _ in range(num_agents)]) act_space = Tuple([CityFlowEnvRay.action_space for _ in range(num_agents)]) register_env( "cityflow_multi", lambda config_: CityFlowEnvRay(config_). with_agent_groups(grouping, obs_space=obs_space, act_space=act_space)) if args.algo == "QMIX": config_ = { # "num_workers": 2, "num_gpus_per_worker": 0, "sample_batch_size": 4, "num_cpus_per_worker": 3, "train_batch_size": 32, "exploration_final_eps": 0.0, "num_workers": 8, "mixer": grid_search(["qmix"]), "env_config": config } group = True elif args.algo == "APEX_QMIX": config_ = { "num_gpus": 1, "num_workers": 2, "optimizer": { "num_replay_buffer_shards": 1, }, "min_iter_time_s": 3, "buffer_size": 2000, "learning_starts": 300, "train_batch_size": 64, "sample_batch_size": 32, "target_network_update_freq": 100, "timesteps_per_iteration": 1000, "env_config": config } group = True else: config_ = {} group = False ray.init() tune.run( args.algo, stop={"timesteps_total": args.epoch * args.num_step}, checkpoint_freq=args.save_freq, config=dict(config_, **{"env": "cityflow_multi"}), )
def test_job(model: Model, checkpoint: Path, outputs_dir: Path) -> None: logger.info( "Initializing ray with 2 cpus and %d GPUs", model.executor.gpus, ) ray.init( num_cpus=2, num_gpus=model.executor.gpus, include_dashboard=False, ) tune.register_env(model.environment.rllib_id, lambda _: model.environment.make_env()) agent = model.agent.make_agent(model.environment) logger.info( "Restoring %s agent with %s trainable params from %s", model.agent.type, f"{model.agent.trainable_parameters_count(agent):,}", checkpoint, ) agent.restore(str(checkpoint)) # Run inference on all of the test benchmarks. results: List[InferenceResult] = [] with model.environment.make_env() as env: test_benchmarks = list(model.testing.benchmark_uris_iterator(env)) for i, benchmark in enumerate(test_benchmarks, start=1): env.reset(benchmark=benchmark) result = InferenceResult.from_agent( env, agent, runtime=model.environment.reward_space == "Runtime") logger.info( "Test %s of %s: %s", f"{i:,d}", f"{len(test_benchmarks):,d}", result, ) results.append(result) # Do this once the actual work has been done so that failed jobs # don't leave meta files lying around. with open(outputs_dir / "test-results.json", "w") as f: json.dump([r.dict() for r in results], f) with open(outputs_dir / "test-meta.json", "w") as f: json.dump( { "timestamp": datetime.now().isoformat(), "checkpoint": checkpoint.name, }, f, ) # Explicit call to ray shutdown here so that multiple consecutive # jobs can initialize ray with different resource requirements. ray.shutdown()
def test_pg_compilation(self): """Test whether a PGTrainer can be built with all frameworks.""" config = pg.DEFAULT_CONFIG.copy() config["num_workers"] = 1 config["rollout_fragment_length"] = 500 # Test with filter to see whether they work w/o preprocessing. config["observation_filter"] = "MeanStdFilter" num_iterations = 1 image_space = Box(-1.0, 1.0, shape=(84, 84, 3)) simple_space = Box(-1.0, 1.0, shape=(3,)) tune.register_env( "random_dict_env", lambda _: RandomEnv( { "observation_space": Dict( { "a": simple_space, "b": Discrete(2), "c": image_space, } ), "action_space": Box(-1.0, 1.0, shape=(1,)), } ), ) tune.register_env( "random_tuple_env", lambda _: RandomEnv( { "observation_space": Tuple( [simple_space, Discrete(2), image_space] ), "action_space": Box(-1.0, 1.0, shape=(1,)), } ), ) for _ in framework_iterator(config, with_eager_tracing=True): # Test for different env types (discrete w/ and w/o image, + cont). for env in [ "random_dict_env", "random_tuple_env", "MsPacmanNoFrameskip-v4", "CartPole-v0", "FrozenLake-v1", ]: print(f"env={env}") trainer = pg.PGTrainer(config=config, env=env) for i in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer, include_prev_action_reward=True)
def register(env_config): ModelCatalog.register_custom_model("1st_model", TorchRNNModel) ModelCatalog.register_custom_model("2nd_model", SecondModel) ModelCatalog.register_custom_model("3rd_model", ThirdModel) ModelCatalog.register_custom_model("4th_model", FourthModel) ModelCatalog.register_custom_model("5th_model", FifthModel) ModelCatalog.register_custom_model("6th_model", SixthModel) ModelCatalog.register_custom_model("7th_model", SeventhModel) tune.register_env("MinerEnv-v0", lambda x: v0.RllibMinerEnv(env_config))
def __init__(self, trainer: Trainer.__class__, weights: str): if not ray.is_initialized(): ray.init() self.obs_state_processor = SimpleObsStateProcessor( infected_population_sorting_per_city) self.act_state_processor = SimpleActStateProcessor( sort_pathogens=self.obs_state_processor.sort_pathogens) register_env( "ic20env", lambda _: InferenceIC20Environment( self.obs_state_processor, self.act_state_processor)) self.trainer = self._load_trainer(trainer(env="ic20env"), weights)
def main(): register_env('MinitaurEnv', lambda env_config: MinitaurGymEnv(**env_config)) if args.eval: config = { 'env_config': { 'render': True, }, 'num_workers': 0, 'seed': 123, 'skill_input': random.randint(0, 9), } trainer = PPOTrainer(config, 'MinitaurEnv') state = pickle.load(open(args.restore_checkpoint, "rb")) states = pickle.loads(state['worker'])['state'] trainer.set_weights(states) while True: trainer.evaluate() time.sleep(0.01) else: log_interval = args.log_interval num_episodes = args.num_episodes ray.init() config = { 'num_workers': args.num_workers, 'policy': args.policy, 'rollout_fragment_length': 200, 'sgd_minibatch_size': 256, 'num_sgd_iter': 20, 'train_batch_size': 3200, 'use_env_rewards': False, } trainer = PPOTrainer(config=config, env='MinitaurEnv') if args.restore_checkpoint: logger.info('Resuming from checkpoint path: {}'.format( args.restore_checkpoint)) trainer.restore(args.restore_checkpoint) for epi_counter in range(num_episodes): res = trainer.train() logger.info(res['info']) if (epi_counter + 1) % log_interval == 0: ckp = trainer.save() logger.info('model saved to: {}'.format(ckp))
def load_agent(): # Initialize training environment ray.init() def environment_creater(params=None): agent = SimpleAvoidAgent(noise=0.05) return TronRaySinglePlayerEnvironment(board_size=13, num_players=4, agent=agent) env = environment_creater() tune.register_env("tron_single_player", environment_creater) ModelCatalog.register_custom_preprocessor("tron_prep", TronExtractBoard) # Configure Deep Q Learning with reasonable values config = DEFAULT_CONFIG.copy() config['num_workers'] = 4 ## config['num_gpus'] = 1 #config["timesteps_per_iteration"] = 1024 #config['target_network_update_freq'] = 256 #config['buffer_size'] = 100_000 #config['schedule_max_timesteps'] = 200_000 #config['exploration_fraction'] = 0.02 #config['compress_observations'] = False #config['n_step'] = 2 #config['seed'] = SEED f #Configure for PPO #config["sample_batch_size"]= 100 #config["train_batch_size"]=200 #config["sgd_minibatch_size"]=60 #Configure A3C with reasonable values # We will use a simple convolution network with 3 layers as our feature extractor config['model']['vf_share_layers'] = True config['model']['conv_filters'] = [(512, 5, 1), (256, 3, 2), (128, 3, 2)] config['model']['fcnet_hiddens'] = [256] config['model']['custom_preprocessor'] = 'tron_prep' # Begin training or evaluation #trainer = DDPGTrainer(config, "tron_single_player") #trainer = A3CTrainer(config, "tron_single_player") trainer = DQNTrainer(config, "tron_single_player") #trainer = PPOTrainer(config, "tron_single_player") trainer.restore("./dqn_checkpoint_3800/checkpoint-3800") return trainer #.get_policy("trainer")
def setup_grouping(config: dict): grouping = { "group_1": list(range(config["env_config"]["max_n_agents"])), } obs_space = Tuple([make_obs(config["env_config"]["observation"], config["env_config"]["observation_config"]).observation_space() for _ in range(config["env_config"]["max_n_agents"])]) act_space = Tuple([GlobalFlatlandGymEnv.action_space for _ in range(config["env_config"]["max_n_agents"])]) register_env( "flatland_sparse_grouped", lambda config: FlatlandSparse(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space))
def check_support_multiagent(alg, config): register_env("multi_agent_mountaincar", lambda _: MultiAgentMountainCar({"num_agents": 2})) register_env("multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 2})) config["log_level"] = "ERROR" for _ in framework_iterator(config, frameworks=("torch", "tf")): if alg in ["DDPG", "APEX_DDPG", "SAC"]: a = get_agent_class(alg)( config=config, env="multi_agent_mountaincar") else: a = get_agent_class(alg)(config=config, env="multi_agent_cartpole") print(a.train()) a.stop()
def test_sac_dict_obs_order(self): dict_space = Dict({ "img": Box(low=0, high=1, shape=(42, 42, 3)), "cont": Box(low=0, high=100, shape=(3, )), }) # Dict space .sample() returns an ordered dict. # Make sure the keys in samples are ordered differently. dict_samples = [{ k: v for k, v in reversed(dict_space.sample().items()) } for _ in range(10)] class NestedDictEnv(Env): def __init__(self): self.action_space = Box(low=-1.0, high=1.0, shape=(2, )) self.observation_space = dict_space self._spec = EnvSpec("NestedDictEnv-v0") self.steps = 0 def reset(self): self.steps = 0 return dict_samples[0] def step(self, action): self.steps += 1 return dict_samples[self.steps], 1, self.steps >= 5, {} tune.register_env("nested", lambda _: NestedDictEnv()) config = (sac.SACConfig().training( replay_buffer_config={ "learning_starts": 0, "capacity": 10 }, train_batch_size=5, ).rollouts( num_rollout_workers=0, rollout_fragment_length=5, ).experimental(_disable_preprocessor_api=True)) num_iterations = 1 for _ in framework_iterator(config, with_eager_tracing=True): trainer = config.build(env="nested") for _ in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer)
def test_dreamer_compilation(self): """Test whether an DreamerTrainer can be built with all frameworks.""" config = dreamer.DEFAULT_CONFIG.copy() tune.register_env("dm_control_hopper_hop", lambda _: hopper_hop()) num_iterations = 1 # Test against all frameworks. for _ in framework_iterator(config, frameworks="torch"): for env in ["dm_control_hopper_hop"]: trainer = dreamer.DREAMERTrainer(config=config, env=env) for i in range(num_iterations): results = trainer.train() print(results) check_compute_single_action(trainer) trainer.stop()
def main(): env_args = { "forest_data_path": "/Users/anmartin/Projects/summer_project/hl_planner/forest_data.tiff", "simulation_data_path": "/Users/anmartin/Projects/FormationSimulation/fastsimulation.json", "num_measurements": 6, "max_forest_heights": [60, 90, 45, 38, 30, 76], "orbit_altitude": 757000, "draw_plot": True } parser = rollout.create_parser() args = parser.parse_args() register_env("offline-orekit", lambda _: OfflineOrekitEnv(env_args)) rollout.run(args, parser)
def test_sac_dict_obs_order(self): dict_space = Dict({ "img": Box(low=0, high=1, shape=(42, 42, 3)), "cont": Box(low=0, high=100, shape=(3, )), }) # Dict space .sample() returns an ordered dict. # Make sure the keys in samples are ordered differently. dict_samples = [{ k: v for k, v in reversed(dict_space.sample().items()) } for _ in range(10)] class NestedDictEnv(Env): def __init__(self): self.action_space = Box(low=-1.0, high=1.0, shape=(2, )) self.observation_space = dict_space self._spec = EnvSpec("NestedDictEnv-v0") self.steps = 0 def reset(self): self.steps = 0 return dict_samples[0] def step(self, action): self.steps += 1 return dict_samples[self.steps], 1, self.steps >= 5, {} tune.register_env("nested", lambda _: NestedDictEnv()) config = sac.DEFAULT_CONFIG.copy() config["num_workers"] = 0 # Run locally. config["learning_starts"] = 0 config["rollout_fragment_length"] = 5 config["train_batch_size"] = 5 config["replay_buffer_config"]["capacity"] = 10 # Disable preprocessors. config["_disable_preprocessor_api"] = True num_iterations = 1 for _ in framework_iterator(config, with_eager_tracing=True): trainer = sac.SACTrainer(env="nested", config=config) for _ in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer)
def main(): register_env("MinitaurEnv", lambda env_config: MinitaurGymEnv(**env_config)) if args.eval: config = { "env_config": { "render": True, }, "num_workers": 0, "seed": 123, } trainer = A2CTrainer(config, "MinitaurEnv") state = pickle.load(open(args.restore_checkpoint, "rb")) states = pickle.loads(state["worker"])["state"] trainer.set_weights(states) while True: trainer.workers.local_worker().sample() time.sleep(0.01) else: ray.init() config = { "num_workers": args.num_workers, "rollout_fragment_length": 50, "train_batch_size": 2500, "num_sgd_iter": 80 } trainer = A2CTrainer(config, "MinitaurEnv") if args.restore_checkpoint: logger.info("Resuming from checkpoint path: {}".format( args.restore_checkpoint)) trainer.restore(args.restore_checkpoint) for epi_counter in range(args.num_episodes): res = trainer.train() logger.info(res["info"]) if (epi_counter + 1) % args.log_interval == 0: ckp = trainer.save() logger.info("model saved to: {}".format(ckp)) ray.shutdown()
def register_pettingzoo_env(env_name): """ Register the Env including preprocessing pipeline, s.t. it can be easily imported using ray. """ def get_env(config): name = env_name.replace('-', '_') env = __import__(f'pettingzoo.atari.{name}', fromlist=[None]) env = env.parallel_env(obs_type='grayscale_image') env = frame_skip_v0(env, 4) env = resize_v0(env, 84, 84) env = frame_stack_v1(env, 4) env = agent_indicator_v0(env) return ParallelPettingZooEnv( env, random_action=config['random_action'], random_proba=config['random_action_probability']) print(f'Registering env with name {env_name}') register_env(env_name, lambda config: get_env(config))
def test_model_free(): ray.init() config = { 'use_dynamics': False, 'num_workers': 2, 'train_batch_size': 200, "train_every": 200, } register_env('MinitaurEnv', lambda env_config: MinitaurGymEnv(**env_config)) trainer = A2CTrainer(config, 'MinitaurEnv') for i in range(5): res = trainer.train() logger.info(res) ray.shutdown()
def check_support_multiagent(alg, config): register_env("multi_agent_mountaincar", lambda _: MultiAgentMountainCar({"num_agents": 2})) register_env("multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 2})) config["log_level"] = "ERROR" for fw in framework_iterator(config): if fw in ["tf2", "tfe"] and \ alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]: continue if alg in ["DDPG", "APEX_DDPG", "SAC"]: a = get_trainer_class(alg)(config=config, env="multi_agent_mountaincar") else: a = get_trainer_class(alg)(config=config, env="multi_agent_cartpole") print(a.train()) a.stop()
def register_doom_envs_rllib(**kwargs): """Register env factories in RLLib system.""" for spec in DOOM_ENVS: def make_env_func(env_config): print('Creating env!!!') cfg = default_cfg(env=spec.name) cfg.pixel_format = 'HWC' # tensorflow models expect HWC by default if 'skip_frames' in env_config: cfg.env_frameskip = env_config['skip_frames'] if 'res_w' in env_config: cfg.res_w = env_config['res_w'] if 'res_h' in env_config: cfg.res_h = env_config['res_h'] if 'wide_aspect_ratio' in env_config: cfg.wide_aspect_ratio = env_config['wide_aspect_ratio'] env = make_doom_env(spec.name, env_config=env_config, cfg=cfg, **kwargs) # we lock the global mutex here, otherwise Doom instances may crash on first reset when too many of them are reset simultaneously lock = FileLock(DOOM_LOCK_PATH) attempt = 0 while True: attempt += 1 try: with lock.acquire(timeout=10): print('Env created, resetting...') env.reset() print('Env reset completed! Config:', env_config) break except Timeout: print( 'Another instance of this application currently holds the lock, attempt:', attempt) return env register_env(spec.name, make_env_func)
env_name = "carla_env" env_config = ENV_CONFIG.copy() env_config.update({ "verbose": False, "x_res": 80, "y_res": 80, "use_depth_camera": False, "discrete_actions": False, "server_map": "/Game/Maps/Town02", "reward_function": "lane_keep", "enable_planner": False, "scenarios": [LANE_KEEP], }) register_env(env_name, lambda env_config: CarlaEnv(env_config)) register_carla_model() ray.init() run_experiments({ "carla-a3c": { "run": "A3C", "env": "carla_env", "resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { "custom_model": "carla", "custom_options": { "image_shape": [80, 80, 6], },
self.poletrans.set_rotation(-x[2]) return self.viewer.render(return_rgb_array=mode == 'rgb_array') def close(self): if self.viewer: self.viewer.close() if __name__ == "__main__": import ray from ray import tune args = parser.parse_args() tune.register_env("cartpole_stateless", lambda _: CartPoleStatelessEnv()) ray.init() configs = { "PPO": { "num_sgd_iter": 5, "vf_share_layers": True, "vf_loss_coeff": 0.0001, }, "IMPALA": { "num_workers": 2, "num_gpus": 0, "vf_loss_coeff": 0.01, }, }
if __name__ == "__main__": args = parser.parse_args() grouping = { "group_1": ["agent_1", "agent_2"], } obs_space = Tuple([ TwoStepGame.observation_space, TwoStepGame.observation_space, ]) act_space = Tuple([ TwoStepGame.action_space, TwoStepGame.action_space, ]) register_env( "grouped_twostep", lambda config: TwoStepGame(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space)) if args.run == "QMIX": config = { "sample_batch_size": 4, "train_batch_size": 32, "exploration_final_eps": 0.0, "num_workers": 0, "mixer": grid_search([None, "qmix", "vdn"]), } group = True elif args.run == "APEX_QMIX": config = { "num_gpus": 0, "num_workers": 2,