コード例 #1
0
    def __init__(self, matrix):
        self.maze = Woods(matrix)
        self.pos_x = None
        self.pos_y = None

        self.action_space = spaces.Discrete(8)
        self.observation_space = WoodsObservationSpace(8)
コード例 #2
0
ファイル: test_woods.py プロジェクト: Metron45/openai-envs
    def test_should_detect_reward(self):
        # given
        woods = Woods(self.SCHEMA)

        # then
        assert woods.is_reward(0, 0) is False
        assert woods.is_reward(0, 1) is False
        assert woods.is_reward(2, 1) is True
        assert woods.is_reward(1, 2) is True
コード例 #3
0
ファイル: test_woods.py プロジェクト: Metron45/openai-envs
    def test_should_calculate_perception(self):
        # given
        woods = Woods(self.SCHEMA)

        # when & then
        assert list("F..GOO..") == woods.perception(1, 0)
        assert list("...O.G..") == woods.perception(3, 0)
        assert list("..G.F.O.") == woods.perception(1, 1)
        assert list("OOF.....") == woods.perception(0, 2)
コード例 #4
0
ファイル: test_woods.py プロジェクト: Metron45/openai-envs
    def test_should_calculate_boundaries(self):
        # when
        woods = Woods(self.SCHEMA)

        # then
        assert woods.max_x == 4
        assert woods.max_y == 3
コード例 #5
0
ファイル: test_woods.py プロジェクト: Metron45/openai-envs
    def test_should_get_insertion_coordinates(self):
        # given
        woods = Woods(self.SCHEMA)

        # when
        cords = woods.possible_insertion_cords

        # then
        assert len(cords) == 8
        assert (0, 0) in cords
        assert (3, 0) in cords
        assert (0, 1) not in cords
コード例 #6
0
ファイル: test_woods.py プロジェクト: Metron45/openai-envs
    def test_should_raise_error_with_invalid_cords(self):
        # given
        woods = Woods(self.SCHEMA)

        # then
        woods.perception(0, 2)

        woods.perception(1, 1)

        with pytest.raises(ValueError):
            # negative value
            woods.perception(-1, 0)

        with pytest.raises(ValueError):
            # x outside range
            woods.perception(4, 1)

        with pytest.raises(ValueError):
            # y outside range
            woods.perception(1, 3)
コード例 #7
0
class AbstractWoods(gym.Env):

    def __init__(self, matrix):
        self.maze = Woods(matrix)
        self.pos_x = None
        self.pos_y = None

        self.action_space = spaces.Discrete(8)
        self.observation_space = WoodsObservationSpace(8)

    def reset(self):
        logging.debug('Resetting the environment')
        self._insert_animat()
        return self._observe()

    def step(self, action):
        previous_observation = self._observe()
        self._take_action(action, previous_observation)

        observation = self._observe()
        reward = self._get_reward()
        episode_over = self._is_over()

        return observation, reward, episode_over, {}

    def render(self, mode='human'):
        if mode == 'human':
            snapshot = np.copy(self.maze.matrix)
            snapshot[self.pos_y, self.pos_x] = 'X'

            sys.stdout.write("\n")
            for row in snapshot:
                sys.stdout.write(" ".join(self._render(el) for el in row))
                sys.stdout.write("\n")
            sys.stdout.flush()

        else:
            super(AbstractWoods, self).render(mode=mode)

    def _take_action(self, action, observation):
        """Executes the action inside the maze"""
        animat_moved = False
        action_type = ACTION_LOOKUP[action]

        if action_type == "N" and not self.is_wall(observation[0]):
            self.pos_y -= 1
            animat_moved = True

            if self.pos_y < 0:
                self.pos_y = self.maze.max_y - 1

        if action_type == 'NE' and not self.is_wall(observation[1]):
            self.pos_x += 1
            self.pos_y -= 1
            animat_moved = True

            if self.pos_y < 0:
                self.pos_y = self.maze.max_y - 1

            if self.pos_x >= self.maze.max_x:
                self.pos_x = 0

        if action_type == "E" and not self.is_wall(observation[2]):
            self.pos_x += 1
            animat_moved = True

            if self.pos_x >= self.maze.max_x:
                self.pos_x = 0

        if action_type == 'SE' and not self.is_wall(observation[3]):
            self.pos_x += 1
            self.pos_y += 1
            animat_moved = True

            if self.pos_x >= self.maze.max_x:
                self.pos_x = 0

            if self.pos_y >= self.maze.max_y:
                self.pos_y = 0

        if action_type == "S" and not self.is_wall(observation[4]):
            self.pos_y += 1
            animat_moved = True

            if self.pos_y >= self.maze.max_y:
                self.pos_y = 0

        if action_type == 'SW' and not self.is_wall(observation[5]):
            self.pos_x -= 1
            self.pos_y += 1
            animat_moved = True

            if self.pos_x < 0:
                self.pos_x = self.maze.max_x - 1

            if self.pos_y >= self.maze.max_y:
                self.pos_y = 0

        if action_type == "W" and not self.is_wall(observation[6]):
            self.pos_x -= 1
            animat_moved = True

            if self.pos_x < 0:
                self.pos_x = self.maze.max_x - 1

        if action_type == 'NW' and not self.is_wall(observation[7]):
            self.pos_x -= 1
            self.pos_y -= 1
            animat_moved = True

            if self.pos_x < 0:
                self.pos_x = self.maze.max_x - 1

            if self.pos_y < 0:
                self.pos_y = self.maze.max_y - 1

        return animat_moved

    def _insert_animat(self):
        possible_coords = self.maze.possible_insertion_cords

        starting_position = random.choice(possible_coords)
        self.pos_x = starting_position[0]
        self.pos_y = starting_position[1]

    def _observe(self):
        return self.maze.perception(self.pos_x, self.pos_y)

    def _perception(self, posx, posy):
        return self.maze.perception(posx, posy)

    def _get_reward(self):
        if self.maze.is_reward(self.pos_x, self.pos_y):
            return 1000

        return 0

    def _is_over(self):
        return self.maze.is_reward(self.pos_x, self.pos_y)

    @staticmethod
    def is_wall(obs):
        return obs in ['O', 'Q']

    @staticmethod
    def _render(el):
        if el in ('O', 'Q'):
            return utils.colorize('■', 'gray')
        elif el == '.':
            return utils.colorize('□', 'white')
        elif el in ('F', 'G'):
            return utils.colorize('$', 'yellow')
        elif el == '*':
            return utils.colorize('A', 'red')
        else:
            return utils.colorize(el, 'cyan')

    def _state_action(self):
        """
        Return states and possible actions in each of them
        """
        mapping = {}

        for x, y in self.maze.possible_insertion_cords:
            [n, ne, e, se, s, sw, w, nw] = self.maze.perception(x, y)
            key = (x, y)
            mapping[key] = []

            actions_perceptions = {
                'N': n,
                'NE': ne,
                'E': e,
                'SE': se,
                'S': s,
                'SW': sw,
                'W': w,
                'NW': nw
            }

            for action, perception in actions_perceptions.items():
                if not self.is_wall(perception):
                    mapping[key].append(find_action_by_direction(action))

        # Cast (int, int) key to (str, str)
        mapping = {(str(k[0]), str(k[1])): v for k, v in mapping.items()}

        return mapping