Esempio n. 1
0
 def __init__(self, size, reward_method='real', black_first=True):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.state = GoGame.get_init_board(size, black_first)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))
     self.group_map = np.empty(self.state.shape[1:], dtype=object)
     self.clear_cache()
Esempio n. 2
0
 def __init__(self, size, komi=0, reward_method='real'):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     self.komi = komi
     self.state = GoGame.get_init_board(size)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS),
                                             shape=(govars.NUM_CHNLS, size, size))
     self.action_space = gym.spaces.Discrete(GoGame.get_action_size(self.state))
     self.group_map = [set(), set()]
     self.done = False
Esempio n. 3
0
 def __init__(self,
              size,
              reward_method='real',
              black_first=True,
              state=None):
     '''
     @param reward_method: either 'heuristic' or 'real'
     heuristic: gives # black pieces - # white pieces.
     real: gives 0 for in-game move, 1 for winning, -1 for losing,
         0 for draw, all from black player's perspective
     '''
     self.size = size
     if state is None:
         self.state = GoGame.get_init_board(size, black_first)
     else:
         assert state.shape[1] == size
         self.state = np.copy(state)
     self.reward_method = RewardMethod(reward_method)
     self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size))
     self.action_space = gym.spaces.Discrete(
         GoGame.get_action_size(self.state))