Esempio n. 1
0
    def step(self, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        states, group_maps = GoGame.get_batch_next_states(
            self.state, actions, self.group_map)
        self.state, self.group_map = states[0], group_maps[0]
        self.done = GoGame.get_game_ended(self.state)
        return np.copy(
            self.state), self.get_reward(), self.done, self.get_info()
Esempio n. 2
0
    def step_batch(self, state, action):
        '''
        Assumes the correct player is making a move. Black goes first.
        return observation, reward, done, info
        But next step will not change the previous state
        '''
        assert not self.done
        if isinstance(action, tuple) or isinstance(action, list) or isinstance(
                action, np.ndarray):
            assert 0 <= action[0] < self.size
            assert 0 <= action[1] < self.size
            action = self.size * action[0] + action[1]
        elif action is None:
            action = self.size**2

        actions = np.array([action])
        next_states, next_group_maps = GoGame.get_batch_next_states(
            state, actions, self.group_map)
        next_state, next_group_map = next_states[0], next_group_maps[0]
        next_done = GoGame.get_game_ended(next_state)
        return np.copy(next_state), self.get_reward_batch(
            next_state, next_done), next_done, self.get_info_batch(next_state)