def __init__(self, size, reward_method='real', black_first=True): ''' @param reward_method: either 'heuristic' or 'real' heuristic: gives # black pieces - # white pieces. real: gives 0 for in-game move, 1 for winning, -1 for losing, 0 for draw, all from black player's perspective ''' self.size = size self.state = GoGame.get_init_board(size, black_first) self.reward_method = RewardMethod(reward_method) self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size)) self.action_space = gym.spaces.Discrete( GoGame.get_action_size(self.state)) self.group_map = np.empty(self.state.shape[1:], dtype=object) self.clear_cache()
def __init__(self, size, komi=0, reward_method='real'): ''' @param reward_method: either 'heuristic' or 'real' heuristic: gives # black pieces - # white pieces. real: gives 0 for in-game move, 1 for winning, -1 for losing, 0 for draw, all from black player's perspective ''' self.size = size self.komi = komi self.state = GoGame.get_init_board(size) self.reward_method = RewardMethod(reward_method) self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS), shape=(govars.NUM_CHNLS, size, size)) self.action_space = gym.spaces.Discrete(GoGame.get_action_size(self.state)) self.group_map = [set(), set()] self.done = False
def __init__(self, size, reward_method='real', black_first=True, state=None): ''' @param reward_method: either 'heuristic' or 'real' heuristic: gives # black pieces - # white pieces. real: gives 0 for in-game move, 1 for winning, -1 for losing, 0 for draw, all from black player's perspective ''' self.size = size if state is None: self.state = GoGame.get_init_board(size, black_first) else: assert state.shape[1] == size self.state = np.copy(state) self.reward_method = RewardMethod(reward_method) self.observation_space = gym.spaces.Box(0, 6, shape=(6, size, size)) self.action_space = gym.spaces.Discrete( GoGame.get_action_size(self.state))