예제 #1
0
    def __init__(self, env_config) -> None:
        super().__init__()

        # TODO implement other generators
        assert env_config['generator'] == 'sparse_rail_generator'

        self._observation = make_obs(env_config['observation'],
                                     env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])

        if not hasattr(env_config, 'worker_index') or (
                env_config.worker_index == 0 and env_config.vector_index == 0):
            print("=" * 50)
            pprint(self._config)
            print("=" * 50)

        self._env = FlatlandGymEnv(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            # render=env_config['render'], # TODO need to fix gl compatibility first
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self.
            _config['regenerate_schedule_on_reset'])
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(self._env)
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(self._env)
예제 #2
0
    def __init__(self, env_config):
        self._observation = make_obs(env_config['observation'],
                                     env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])

        # Overwrites with env_config seed if it exists
        if env_config.get('seed'):
            self._config['seed'] = env_config.get('seed')

        self._env = FlatlandGymEnv(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self.
            _config['regenerate_schedule_on_reset'])
        if env_config['observation'] == 'shortest_path':
            self._env = ShortestPathActionWrapper(self._env)
        if env_config.get('sparse_reward', False):
            self._env = SparseRewardWrapper(
                self._env,
                finished_reward=env_config.get('done_reward', 1),
                not_finished_reward=env_config.get('not_finished_reward', -1))
        if env_config.get('deadlock_reward', 0) != 0:
            self._env = DeadlockWrapper(
                self._env, deadlock_reward=env_config['deadlock_reward'])
        if env_config.get('resolve_deadlocks', False):
            deadlock_reward = env_config.get('deadlock_reward', 0)
            self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(
                self._env, env_config.get('accumulate_skipped_rewards', False))
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(self._env)
예제 #3
0
 def __init__(self, env_config) -> None:
     super().__init__()
     self._env_config = env_config
     self._test = env_config.get('test', False)
     self._min_seed = env_config['min_seed']
     self._max_seed = env_config['max_seed']
     assert self._min_seed <= self._max_seed
     self._min_test_seed = env_config.get('min_test_seed', 0)
     self._max_test_seed = env_config.get('max_test_seed', 100)
     assert self._min_test_seed <= self._max_test_seed
     self._next_test_seed = self._min_test_seed
     self._num_resets = 0
     self._observation = make_obs(env_config['observation'], env_config.get('observation_config'))
     self._env = FlatlandGymEnv(
         rail_env=self._launch(),
         observation_space=self._observation.observation_space(),
         render=env_config.get('render'),
         regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'],
         regenerate_schedule_on_reset=env_config['regenerate_schedule_on_reset']
     )
     if env_config['observation'] == 'shortest_path':
         self._env = ShortestPathActionWrapper(self._env)
     if env_config.get('sparse_reward', False):
         self._env = SparseRewardWrapper(self._env, finished_reward=env_config.get('done_reward', 1),
                                         not_finished_reward=env_config.get('not_finished_reward', -1))
     if env_config.get('deadlock_reward', 0) != 0:
         self._env = DeadlockWrapper(self._env, deadlock_reward=env_config['deadlock_reward'])
     if env_config.get('resolve_deadlocks', False):
         deadlock_reward = env_config.get('deadlock_reward', 0)
         self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
     if env_config.get('skip_no_choice_cells', False):
         self._env = SkipNoChoiceCellsWrapper(self._env, env_config.get('accumulate_skipped_rewards', False))
     if env_config.get('available_actions_obs', False):
         self._env = AvailableActionsWrapper(self._env)
예제 #4
0
    def __init__(self, env_config) -> None:
        super().__init__()

        # TODO implement other generators
        assert env_config['generator'] == 'sparse_rail_generator'
        self._env_config = env_config

        self._observation = make_obs(env_config['observation'],
                                     env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])

        # Overwrites with env_config seed if it exists
        if env_config.get('seed'):
            self._config['seed'] = env_config.get('seed')

        if not hasattr(env_config, 'worker_index') or (
                env_config.worker_index == 0 and env_config.vector_index == 0):
            print("=" * 50)
            pprint(self._config)
            print("=" * 50)

        self._env = FlatlandGymEnv(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            render=env_config.get('render'),
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self.
            _config['regenerate_schedule_on_reset'])
        if env_config['observation'] == 'shortest_path':
            self._env = ShortestPathActionWrapper(self._env)
        if env_config.get('sparse_reward', False):
            self._env = SparseRewardWrapper(
                self._env,
                finished_reward=env_config.get('done_reward', 1),
                not_finished_reward=env_config.get('not_finished_reward', -1))
        if env_config.get('deadlock_reward', 0) != 0:
            self._env = DeadlockWrapper(
                self._env, deadlock_reward=env_config['deadlock_reward'])
        if env_config.get('resolve_deadlocks', False):
            deadlock_reward = env_config.get('deadlock_reward', 0)
            self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(
                self._env,
                env_config.get('accumulate_skipped_rewards', False),
                discounting=env_config.get('discounting', 1.))
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(
                self._env, env_config.get('allow_noop', True))
 def __init__(self, env_config) -> None:
     super().__init__()
     self._env_config = env_config
     self._test = env_config.get('test', False)
     self._min_seed = env_config['min_seed']
     self._max_seed = env_config['max_seed']
     assert self._min_seed <= self._max_seed
     self._min_test_seed = env_config.get('min_test_seed', 0)
     self._max_test_seed = env_config.get('max_test_seed', 100)
     assert self._min_test_seed <= self._max_test_seed
     self._next_test_seed = self._min_test_seed
     self._num_resets = 0
     self._observation = make_obs(env_config['observation'], env_config.get('observation_config'))
     self._env = FlatlandGymEnv(
         rail_env=self._launch(),
         observation_space=self._observation.observation_space(),
         # render=env_config['render'], # TODO need to fix gl compatibility first
         regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'],
         regenerate_schedule_on_reset=env_config['regenerate_schedule_on_reset']
     )
     if env_config.get('skip_no_choice_cells', False):
         self._env = SkipNoChoiceCellsWrapper(self._env)
     if env_config.get('available_actions_obs', False):
         self._env = AvailableActionsWrapper(self._env)
class FlatlandRandomSparseSmall(MultiAgentEnv):
    def __init__(self, env_config) -> None:
        super().__init__()
        self._env_config = env_config
        self._test = env_config.get('test', False)
        self._min_seed = env_config['min_seed']
        self._max_seed = env_config['max_seed']
        assert self._min_seed <= self._max_seed
        self._min_test_seed = env_config.get('min_test_seed', 0)
        self._max_test_seed = env_config.get('max_test_seed', 100)
        assert self._min_test_seed <= self._max_test_seed
        self._next_test_seed = self._min_test_seed
        self._num_resets = 0
        self._observation = make_obs(env_config['observation'], env_config.get('observation_config'))
        self._env = FlatlandGymEnv(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            # render=env_config['render'], # TODO need to fix gl compatibility first
            regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=env_config['regenerate_schedule_on_reset']
        )
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(self._env)
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(self._env)

    @property
    def observation_space(self) -> gym.spaces.Space:
        return self._observation.observation_space()

    @property
    def action_space(self) -> gym.spaces.Space:
        return self._env.action_space

    def _generate_random_seed(self):
        random.seed(None)
        return random.randint(self._min_seed, self._max_seed)

    def _launch(self, max_tries=5):
        env = None
        num_tries = 0
        while env is None and num_tries < max_tries:
            if self._test:
                random_seed = self._next_test_seed
                rel_next_seed = self._next_test_seed - self._min_test_seed
                rel_max_seed = self._max_test_seed - self._min_test_seed
                self._next_test_seed = self._min_test_seed + ((rel_next_seed + 1) % (rel_max_seed + 1))  # inclusive max
            else:
                random_seed = self._generate_random_seed()
            random_seed = random_seed * 19997 + 997  # backwards consistency
            env = random_sparse_env_small(random_seed=random_seed, max_width=45, max_height=45,
                                          observation_builder=self._observation.builder())
            num_tries += 1
        if env is None:
            raise RuntimeError(f"Unable to launch env within {max_tries} tries.")
        return env

    def step(self, action_dict):
        return self._env.step(action_dict)

    def reset(self):
        if self._test or (
                self._env_config['reset_env_freq'] is not None
                and self._num_resets > 0
                and self._num_resets % self._env_config['reset_env_freq'] == 0
        ):
            self._env.env = self._launch()
        self._num_resets += 1
        return self._env.reset(random_seed=self._next_test_seed if self._test else self._generate_random_seed())
예제 #7
0
class FlatlandSingle(gym.Env):
    def render(self, mode='human'):
        pass

    def __init__(self, env_config):
        self._observation = make_obs(env_config['observation'],
                                     env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])

        # Overwrites with env_config seed if it exists
        if env_config.get('seed'):
            self._config['seed'] = env_config.get('seed')

        self._env = FlatlandGymEnv(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self.
            _config['regenerate_schedule_on_reset'])
        if env_config['observation'] == 'shortest_path':
            self._env = ShortestPathActionWrapper(self._env)
        if env_config.get('sparse_reward', False):
            self._env = SparseRewardWrapper(
                self._env,
                finished_reward=env_config.get('done_reward', 1),
                not_finished_reward=env_config.get('not_finished_reward', -1))
        if env_config.get('deadlock_reward', 0) != 0:
            self._env = DeadlockWrapper(
                self._env, deadlock_reward=env_config['deadlock_reward'])
        if env_config.get('resolve_deadlocks', False):
            deadlock_reward = env_config.get('deadlock_reward', 0)
            self._env = DeadlockResolutionWrapper(self._env, deadlock_reward)
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(
                self._env, env_config.get('accumulate_skipped_rewards', False))
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(self._env)

    def _launch(self):
        rail_generator = sparse_rail_generator(
            seed=self._config['seed'],
            max_num_cities=self._config['max_num_cities'],
            grid_mode=self._config['grid_mode'],
            max_rails_between_cities=self._config['max_rails_between_cities'],
            max_rails_in_city=self._config['max_rails_in_city'])

        malfunction_generator = no_malfunction_generator()
        if {'malfunction_rate', 'min_duration', 'max_duration'
            } <= self._config.keys():
            stochastic_data = {
                'malfunction_rate': self._config['malfunction_rate'],
                'min_duration': self._config['malfunction_min_duration'],
                'max_duration': self._config['malfunction_max_duration']
            }
            malfunction_generator = malfunction_from_params(stochastic_data)

        speed_ratio_map = None
        if 'speed_ratio_map' in self._config:
            speed_ratio_map = {
                float(k): float(v)
                for k, v in self._config['speed_ratio_map'].items()
            }
        schedule_generator = sparse_schedule_generator(speed_ratio_map)

        env = None
        try:
            env = RailEnv(
                width=self._config['width'],
                height=self._config['height'],
                rail_generator=rail_generator,
                schedule_generator=schedule_generator,
                number_of_agents=self._config['number_of_agents'],
                malfunction_generator_and_process_data=malfunction_generator,
                obs_builder_object=self._observation.builder(),
                remove_agents_at_target=False,
                random_seed=self._config['seed'])

            env.reset()
        except ValueError as e:
            logging.error("=" * 50)
            logging.error(f"Error while creating env: {e}")
            logging.error("=" * 50)

        return env

    def step(self, action_list):
        # print("="*50)
        # print(action_dict)

        action_dict = {}
        for i, action in enumerate(action_list):
            action_dict[i] = action

        step_r = self._env.step(action_dict)
        # print(step_r)
        # print("="*50)

        return StepOutput(obs=[step for step in step_r.obs.values()],
                          reward=np.sum([r for r in step_r.reward.values()]),
                          done=all(step_r.done.values()),
                          info=step_r.info[0])
        #return step_r

    def reset(self):
        foo = self._env.reset()

        # print("="*50)
        # print(foo)
        # print("="*50)

        return [step for step in foo.values()]
        #return foo

    @property
    def observation_space(self) -> gym.spaces.Space:
        observation_space = self._observation.observation_space()

        if isinstance(observation_space, gym.spaces.Box):
            return gym.spaces.Box(low=-np.inf,
                                  high=np.inf,
                                  shape=(
                                      self._config['number_of_agents'],
                                      *observation_space.shape,
                                  ))
        elif isinstance(observation_space, gym.spaces.Tuple):
            spaces = observation_space.spaces * self._config['number_of_agents']
            return gym.spaces.Tuple(spaces)
        else:
            raise ValueError("Unhandled space:", observation_space.__class__)

    @property
    def action_space(self) -> gym.spaces.Space:
        return gym.spaces.MultiDiscrete([5] * self._config['number_of_agents'])
예제 #8
0
class FlatlandSparse(MultiAgentEnv):
    def __init__(self, env_config) -> None:
        super().__init__()

        # TODO implement other generators
        assert env_config['generator'] == 'sparse_rail_generator'

        self._observation = make_obs(env_config['observation'],
                                     env_config.get('observation_config'))
        self._config = get_generator_config(env_config['generator_config'])

        if not hasattr(env_config, 'worker_index') or (
                env_config.worker_index == 0 and env_config.vector_index == 0):
            print("=" * 50)
            pprint(self._config)
            print("=" * 50)

        self._env = FlatlandGymEnv(
            rail_env=self._launch(),
            observation_space=self._observation.observation_space(),
            # render=env_config['render'], # TODO need to fix gl compatibility first
            regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'],
            regenerate_schedule_on_reset=self.
            _config['regenerate_schedule_on_reset'])
        if env_config.get('skip_no_choice_cells', False):
            self._env = SkipNoChoiceCellsWrapper(self._env)
        if env_config.get('available_actions_obs', False):
            self._env = AvailableActionsWrapper(self._env)

    @property
    def observation_space(self) -> gym.spaces.Space:
        print(self._env.observation_space)
        return self._env.observation_space

    @property
    def action_space(self) -> gym.spaces.Space:
        return self._env.action_space

    def _launch(self):
        rail_generator = sparse_rail_generator(
            seed=self._config['seed'],
            max_num_cities=self._config['max_num_cities'],
            grid_mode=self._config['grid_mode'],
            max_rails_between_cities=self._config['max_rails_between_cities'],
            max_rails_in_city=self._config['max_rails_in_city'])

        malfunction_generator = no_malfunction_generator()
        if {
                'malfunction_rate', 'malfunction_min_duration',
                'malfunction_max_duration'
        } <= self._config.keys():
            stochastic_data = {
                'malfunction_rate': self._config['malfunction_rate'],
                'min_duration': self._config['malfunction_min_duration'],
                'max_duration': self._config['malfunction_max_duration']
            }
            malfunction_generator = malfunction_from_params(stochastic_data)

        speed_ratio_map = None
        if 'speed_ratio_map' in self._config:
            speed_ratio_map = {
                float(k): float(v)
                for k, v in self._config['speed_ratio_map'].items()
            }
        schedule_generator = sparse_schedule_generator(speed_ratio_map)

        env = None
        try:
            env = RailEnv(
                width=self._config['width'],
                height=self._config['height'],
                rail_generator=rail_generator,
                schedule_generator=schedule_generator,
                number_of_agents=self._config['number_of_agents'],
                malfunction_generator_and_process_data=malfunction_generator,
                obs_builder_object=self._observation.builder(),
                remove_agents_at_target=False,
                random_seed=self._config['seed'])

            env.reset()
        except ValueError as e:
            logging.error("=" * 50)
            logging.error(f"Error while creating env: {e}")
            logging.error("=" * 50)

        return env

    def step(self, action_dict):
        return self._env.step(action_dict)

    def reset(self):
        return self._env.reset()