def step(self, action): ''' Assumes the correct player is making a move. Black goes first. return observation, reward, done, info ''' assert not self.done if isinstance(action, tuple) or isinstance(action, list) or isinstance( action, np.ndarray): assert 0 <= action[0] < self.size assert 0 <= action[1] < self.size action = self.size * action[0] + action[1] elif action is None: action = self.size**2 actions = np.array([action]) states, group_maps = GoGame.get_batch_next_states( self.state, actions, self.group_map) self.state, self.group_map = states[0], group_maps[0] self.done = GoGame.get_game_ended(self.state) return np.copy( self.state), self.get_reward(), self.done, self.get_info()
def step_batch(self, state, action): ''' Assumes the correct player is making a move. Black goes first. return observation, reward, done, info But next step will not change the previous state ''' assert not self.done if isinstance(action, tuple) or isinstance(action, list) or isinstance( action, np.ndarray): assert 0 <= action[0] < self.size assert 0 <= action[1] < self.size action = self.size * action[0] + action[1] elif action is None: action = self.size**2 actions = np.array([action]) next_states, next_group_maps = GoGame.get_batch_next_states( state, actions, self.group_map) next_state, next_group_map = next_states[0], next_group_maps[0] next_done = GoGame.get_game_ended(next_state) return np.copy(next_state), self.get_reward_batch( next_state, next_done), next_done, self.get_info_batch(next_state)