def __init__(self, env_config, **kwargs): super(FlatlandMeta, self).__init__(env_config, **kwargs) assert env_config['generator'] == 'sparse_rail_generator' self._env_config = env_config self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._config = get_generator_config(env_config['generator_config']) if env_config.get('number_of_agents', None) is not None: self._config['number_of_agents'] = env_config['number_of_agents'] # Overwrites with env_config seed if it exists if env_config.get('seed'): self._config['seed'] = env_config.get('seed') if not hasattr(env_config, 'worker_index') or ( env_config.worker_index == 0 and env_config.vector_index == 0): print("=" * 50) print(self._config) print("=" * 50) self._env = RobustFlatlandGymEnv( rail_env=self._launch(), observation_space=self._observation.observation_space(), render=env_config.get('render'), regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=self. _config['regenerate_schedule_on_reset'], config=env_config, allow_noop=True) self.last_obs = None
def setup_policy_map(self, config: dict): obs_space = make_obs(config["env_config"]["observation"], config["env_config"]["observation_config"]).observation_space() config["multiagent"] = { "policies": {"pol_" + str(i): (None, obs_space, FillingFlatlandGymEnv.action_space, {"agent_id": i}) for i in range(config["env_config"]["observation_config"]["max_n_agents"])}, "policy_mapping_fn": lambda agent_id: "pol_" + str(agent_id)}
def __init__(self, config) -> None: super().__init__(config) self._observations = [ make_obs(obs_name, config[obs_name]) for obs_name in config.keys() ] self._builder = CombinedObsForRailEnv( [o._builder for o in self._observations])
def evaluate(config, run): start_time = time() obs_builder = make_obs( config["env_config"]['observation'], config["env_config"].get('observation_config')).builder() evaluation_number = 0 total_reward = 0 all_rewards = [] sp_agent = ShortestPathAgent() while True: try: observation, info = remote_client.env_create( obs_builder_object=obs_builder) if not observation: break steps = 0 evaluation_number += 1 episode_start_info(evaluation_number, remote_client=remote_client) done = defaultdict(lambda: False) while True: try: while not done['__all__']: rail_actions = sp_agent.compute_actions( observation, remote_client.env) observation, all_rewards, done, info = remote_client.env_step( rail_actions) steps += 1 print('.', end='', flush=True) if (time() - start_time) > TIME_LIMIT: skip(done) break if done['__all__']: total_reward = episode_end_info( all_rewards, total_reward, evaluation_number, steps, remote_client=remote_client) break except TimeoutException as err: print( "Timeout! Will skip this episode and go to the next.", err) break except TimeoutException as err: print( "Timeout during planning time. Will skip to next evaluation!", err) print("Evaluation of all environments complete...") print(remote_client.submit())
def __init__(self, env_config, fine_tune_env_path=None, max_steps=None, **kwargs) -> None: super().__init__(env_config.get("actions_are_logits", False), max_steps=max_steps) assert env_config['generator'] == 'sparse_rail_generator' self._env_config = env_config self._fine_tune_env_path = fine_tune_env_path self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._config = get_generator_config(env_config['generator_config']) if env_config.get('number_of_agents', None) is not None: self._config['number_of_agents'] = env_config['number_of_agents'] # Overwrites with env_config seed if it exists if env_config.get('seed'): self._config['seed'] = env_config.get('seed') if not hasattr(env_config, 'worker_index') or (env_config.worker_index == 0 and env_config.vector_index == 0): print("=" * 50) pprint(self._config) print("=" * 50) self._gym_env_class = self._gym_envs[env_config.get("gym_env", "default")] self._env = self._gym_env_class( rail_env=self._launch(), observation_space=self._observation.observation_space(), render=env_config.get('render'), regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=self._config['regenerate_schedule_on_reset'], config=env_config, allow_noop=env_config.get('allow_noop', True) ) if env_config['observation'] in self._sp_action_needed: self._env = ShortestPathActionWrapper(self._env) if env_config['observation'] == 'path' or env_config['observation'] == 'nr_conflicts_path': self._env = NoStopShortestPathActionWrapper(self._env) if env_config.get('priorization', False): self._env = PriorizationWrapper(self._env) if env_config.get('sparse_priorization', False): self._env = SparsePriorizationWrapper(self._env) if env_config.get('sparse_reward', False): self._env = SparseRewardWrapper(self._env, finished_reward=env_config.get('done_reward', 1), not_finished_reward=env_config.get('not_finished_reward', -1)) if env_config.get('global_reward', False): self._env = GlobalRewardWrapper(self._env) if env_config.get('deadlock_reward', 0) != 0: self._env = DeadlockWrapper(self._env, deadlock_reward=env_config['deadlock_reward']) if env_config.get('resolve_deadlocks', False): deadlock_reward = env_config.get('deadlock_reward', 0) self._env = DeadlockResolutionWrapper(self._env, deadlock_reward) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper(self._env, env_config.get('accumulate_skipped_rewards', False), discounting=env_config.get('discounting', 1.)) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper(self._env, env_config.get('allow_noop', True)) if env_config.get('fill_unavailable_actions', False): self._env = AvailableActionsWrapper(self._env, env_config.get('allow_noop', True))
def setup_hierarchical_policies(self, config: dict): obs_space: gym.spaces.Tuple = make_obs(config["env_config"]["observation"], config["env_config"]["observation_config"]).observation_space() config["multiagent"] = { "policies": {"meta": (None, obs_space.spaces[0], gym.spaces.Box(high=1, low=0, shape=(1,)), {}), "agent": (None, obs_space.spaces[1], FillingFlatlandGymEnv.action_space, {}) }, "policy_mapping_fn": lambda agent_id: "meta" if 'meta' in str(agent_id) else "agent" }
def __init__(self, config) -> None: super().__init__(config) self.interval = 10 if config.get('interval', None) is not None: self.interval = config['interval'] del config['interval'] self._observations = [ make_obs(obs_name, config[obs_name]) for obs_name in config.keys() ] self._builder = CombinedObsForRailEnv([ o._builder for o in self._observations ], interval=self.interval)
def setup_grouping(config: dict): grouping = { "group_1": list(range(config["env_config"]["max_n_agents"])), } obs_space = Tuple([make_obs(config["env_config"]["observation"], config["env_config"]["observation_config"]).observation_space() for _ in range(config["env_config"]["max_n_agents"])]) act_space = Tuple([GlobalFlatlandGymEnv.action_space for _ in range(config["env_config"]["max_n_agents"])]) register_env( "flatland_sparse_grouped", lambda config: FlatlandSparse(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space))
def __init__(self, env_config): self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._config = get_generator_config(env_config['generator_config']) self._global_obs = env_config.get('observation_config', {}).get('global_obs', False) # Overwrites with env_config seed if it exists if env_config.get('seed'): self._config['seed'] = env_config.get('seed') if env_config.get('gym_env', None) is not None: self._gym_env_class = self._gym_envs[env_config["gym_env"]] else: self._gym_env_class = self._gym_envs[ "default"] if not self._global_obs else self._gym_envs["single"] self._env = self._gym_env_class( rail_env=self._launch(), observation_space=self._observation.observation_space(), regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=self. _config['regenerate_schedule_on_reset'], config=env_config) if env_config['observation'] == 'shortest_path': self._env = ShortestPathActionWrapper(self._env) if env_config.get('sparse_reward', False): self._env = SparseRewardWrapper( self._env, finished_reward=env_config.get('done_reward', 1), not_finished_reward=env_config.get('not_finished_reward', -1)) if env_config.get('deadlock_reward', 0) != 0: self._env = DeadlockWrapper( self._env, deadlock_reward=env_config['deadlock_reward']) if env_config.get('resolve_deadlocks', False): deadlock_reward = env_config.get('deadlock_reward', 0) self._env = DeadlockResolutionWrapper(self._env, deadlock_reward) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper( self._env, env_config.get('accumulate_skipped_rewards', False)) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper(self._env, allow_noop=False)
def __init__(self, env_config) -> None: super().__init__() self._env_config = env_config self._test = env_config.get('test', False) self._min_seed = env_config['min_seed'] self._max_seed = env_config['max_seed'] assert self._min_seed <= self._max_seed self._min_test_seed = env_config.get('min_test_seed', 0) self._max_test_seed = env_config.get('max_test_seed', 100) assert self._min_test_seed <= self._max_test_seed self._next_test_seed = self._min_test_seed self._num_resets = 0 self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._env = FlatlandGymEnv( rail_env=self._launch(), observation_space=self._observation.observation_space(), render=env_config.get('render'), regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=env_config[ 'regenerate_schedule_on_reset']) if env_config['observation'] == 'shortest_path': self._env = ShortestPathActionWrapper(self._env) if env_config.get('sparse_reward', False): self._env = SparseRewardWrapper( self._env, finished_reward=env_config.get('done_reward', 1), not_finished_reward=env_config.get('not_finished_reward', -1)) if env_config.get('deadlock_reward', 0) != 0: self._env = DeadlockWrapper( self._env, deadlock_reward=env_config['deadlock_reward']) if env_config.get('resolve_deadlocks', False): deadlock_reward = env_config.get('deadlock_reward', 0) self._env = DeadlockResolutionWrapper(self._env, deadlock_reward) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper( self._env, env_config.get('accumulate_skipped_rewards', False)) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper(self._env)
def get_env(config=None, rl=False): n_agents = 16 schedule_generator = sparse_schedule_generator(None) rail_generator = sparse_rail_generator( seed=seed, max_num_cities=3, grid_mode=False, max_rails_between_cities=2, max_rails_in_city=4, ) if rl: obs_builder = make_obs("combined", { "path": None, "simple_meta": None }).builder() else: obs_builder = DummyObs() params = MalfunctionParameters(malfunction_rate=1 / 1000, max_duration=50, min_duration=20) malfunction_generator = ParamMalfunctionGen(params) env = RailEnv( width=28, height=28, rail_generator=rail_generator, schedule_generator=schedule_generator, number_of_agents=n_agents, malfunction_generator=malfunction_generator, obs_builder_object=obs_builder, remove_agents_at_target=True, random_seed=seed, ) return env
def get_env(config=None, rl=False): n_agents = 32 schedule_generator = sparse_schedule_generator(None) rail_generator = sparse_rail_generator( seed=seed, max_num_cities=4, grid_mode=False, max_rails_between_cities=2, max_rails_in_city=4, ) if rl: obs_builder = make_obs( config["env_config"]['observation'], config["env_config"].get('observation_config')).builder() else: obs_builder = DummyObs() params = MalfunctionParameters(malfunction_rate=1 / 1000, max_duration=50, min_duration=20) malfunction_generator = ParamMalfunctionGen(params) env = RailEnv( width=32, height=32, rail_generator=rail_generator, schedule_generator=schedule_generator, number_of_agents=n_agents, malfunction_generator=malfunction_generator, obs_builder_object=obs_builder, remove_agents_at_target=True, random_seed=seed, ) return env
from flatlander.envs.utils.gym_env import FlatlandGymEnv parser = argparse.ArgumentParser() parser.add_argument("--run", type=str, default="QMIX") parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument("--as-test", action="store_true") parser.add_argument("--torch", action="store_true") parser.add_argument("--stop-timesteps", type=int, default=50000) if __name__ == "__main__": args = parser.parse_args() grouping = { "group_1": [0, 1, 2, 3, 4], } obs_space = Tuple([make_obs("tree", {"max_depth": 2, "shortest_path_max_depth": 30}).observation_space() for i in range(5)]) act_space = Tuple([FlatlandGymEnv.action_space for i in range(5)]) register_env( "flatland_sparse_grouped", lambda config: FlatlandSparse(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space)) config = { "rollout_fragment_length": 50, "train_batch_size": 1000, "exploration_config": { "epsilon_timesteps": 5000, "final_epsilon": 0.05,