Esempio n. 1
0
class GridWorldEnv(gym.Env):
    metadata = {
        'render.modes': ['human']
    }

    def __init__(self):
        self.width = 16
        self.height = 9
        self._cell_size = 10

        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(self.height * self._cell_size, self.width * self._cell_size, 1)

        self.viewer = Viewer(width=self.width, height=self.height, cell_size=self._cell_size)

        self._seed()
        self.reset()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        self.viewer.move_agent(action)
        self.state = self.viewer.get_state()
        done = self.viewer.is_on_goal()
        reward = 1 if done else 0
        return self.state, reward, done, {}

    def _reset(self):
        self.viewer.reset_agent()
        self.state = self.viewer.get_state()
        return self.state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        return self.viewer.render()

    def set_grid_size(self, width, height):
        self.width = width
        self.height = height
        self.viewer = Viewer(height=self.height, width=self.width, cell_size=self._cell_size)
        self.reset()