def __init__(self, env_config) -> None: super().__init__() # TODO implement other generators assert env_config['generator'] == 'sparse_rail_generator' self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._config = get_generator_config(env_config['generator_config']) if not hasattr(env_config, 'worker_index') or ( env_config.worker_index == 0 and env_config.vector_index == 0): print("=" * 50) pprint(self._config) print("=" * 50) self._env = FlatlandGymEnv( rail_env=self._launch(), observation_space=self._observation.observation_space(), # render=env_config['render'], # TODO need to fix gl compatibility first regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=self. _config['regenerate_schedule_on_reset']) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper(self._env) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper(self._env)
def __init__(self, env_config): self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._config = get_generator_config(env_config['generator_config']) # Overwrites with env_config seed if it exists if env_config.get('seed'): self._config['seed'] = env_config.get('seed') self._env = FlatlandGymEnv( rail_env=self._launch(), observation_space=self._observation.observation_space(), regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=self. _config['regenerate_schedule_on_reset']) if env_config['observation'] == 'shortest_path': self._env = ShortestPathActionWrapper(self._env) if env_config.get('sparse_reward', False): self._env = SparseRewardWrapper( self._env, finished_reward=env_config.get('done_reward', 1), not_finished_reward=env_config.get('not_finished_reward', -1)) if env_config.get('deadlock_reward', 0) != 0: self._env = DeadlockWrapper( self._env, deadlock_reward=env_config['deadlock_reward']) if env_config.get('resolve_deadlocks', False): deadlock_reward = env_config.get('deadlock_reward', 0) self._env = DeadlockResolutionWrapper(self._env, deadlock_reward) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper( self._env, env_config.get('accumulate_skipped_rewards', False)) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper(self._env)
def __init__(self, env_config) -> None: super().__init__() self._env_config = env_config self._test = env_config.get('test', False) self._min_seed = env_config['min_seed'] self._max_seed = env_config['max_seed'] assert self._min_seed <= self._max_seed self._min_test_seed = env_config.get('min_test_seed', 0) self._max_test_seed = env_config.get('max_test_seed', 100) assert self._min_test_seed <= self._max_test_seed self._next_test_seed = self._min_test_seed self._num_resets = 0 self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._env = FlatlandGymEnv( rail_env=self._launch(), observation_space=self._observation.observation_space(), render=env_config.get('render'), regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=env_config['regenerate_schedule_on_reset'] ) if env_config['observation'] == 'shortest_path': self._env = ShortestPathActionWrapper(self._env) if env_config.get('sparse_reward', False): self._env = SparseRewardWrapper(self._env, finished_reward=env_config.get('done_reward', 1), not_finished_reward=env_config.get('not_finished_reward', -1)) if env_config.get('deadlock_reward', 0) != 0: self._env = DeadlockWrapper(self._env, deadlock_reward=env_config['deadlock_reward']) if env_config.get('resolve_deadlocks', False): deadlock_reward = env_config.get('deadlock_reward', 0) self._env = DeadlockResolutionWrapper(self._env, deadlock_reward) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper(self._env, env_config.get('accumulate_skipped_rewards', False)) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper(self._env)
def __init__(self, env_config) -> None: super().__init__() # TODO implement other generators assert env_config['generator'] == 'sparse_rail_generator' self._env_config = env_config self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._config = get_generator_config(env_config['generator_config']) # Overwrites with env_config seed if it exists if env_config.get('seed'): self._config['seed'] = env_config.get('seed') if not hasattr(env_config, 'worker_index') or ( env_config.worker_index == 0 and env_config.vector_index == 0): print("=" * 50) pprint(self._config) print("=" * 50) self._env = FlatlandGymEnv( rail_env=self._launch(), observation_space=self._observation.observation_space(), render=env_config.get('render'), regenerate_rail_on_reset=self._config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=self. _config['regenerate_schedule_on_reset']) if env_config['observation'] == 'shortest_path': self._env = ShortestPathActionWrapper(self._env) if env_config.get('sparse_reward', False): self._env = SparseRewardWrapper( self._env, finished_reward=env_config.get('done_reward', 1), not_finished_reward=env_config.get('not_finished_reward', -1)) if env_config.get('deadlock_reward', 0) != 0: self._env = DeadlockWrapper( self._env, deadlock_reward=env_config['deadlock_reward']) if env_config.get('resolve_deadlocks', False): deadlock_reward = env_config.get('deadlock_reward', 0) self._env = DeadlockResolutionWrapper(self._env, deadlock_reward) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper( self._env, env_config.get('accumulate_skipped_rewards', False), discounting=env_config.get('discounting', 1.)) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper( self._env, env_config.get('allow_noop', True))
def __init__(self, env_config) -> None: super().__init__() self._env_config = env_config self._test = env_config.get('test', False) self._min_seed = env_config['min_seed'] self._max_seed = env_config['max_seed'] assert self._min_seed <= self._max_seed self._min_test_seed = env_config.get('min_test_seed', 0) self._max_test_seed = env_config.get('max_test_seed', 100) assert self._min_test_seed <= self._max_test_seed self._next_test_seed = self._min_test_seed self._num_resets = 0 self._observation = make_obs(env_config['observation'], env_config.get('observation_config')) self._env = FlatlandGymEnv( rail_env=self._launch(), observation_space=self._observation.observation_space(), # render=env_config['render'], # TODO need to fix gl compatibility first regenerate_rail_on_reset=env_config['regenerate_rail_on_reset'], regenerate_schedule_on_reset=env_config['regenerate_schedule_on_reset'] ) if env_config.get('skip_no_choice_cells', False): self._env = SkipNoChoiceCellsWrapper(self._env) if env_config.get('available_actions_obs', False): self._env = AvailableActionsWrapper(self._env)