def __init__(self, flatten_board_state=True, discount_factor=1.0): self.__state = go.Position(komi=0.5) self.__flatten_state = flatten_board_state self.__discount_factor = discount_factor N = int(os.environ.get("BOARD_SIZE")) self.__state_size = N**2 self.__action_size = self.__state_size + 1 # board size and an extra action for "pass" self.__num_players = 2
def reset(self): """ reset the game at the beginning of the game to get an initial state :return: should reset the env and return a initial state """ self.__state = go.Position(komi=0.5) if self.__flatten_state: _state = np.reshape(self.info_state, (self.__state_size,)) else: _state = self.info_state observations = {"info_state": [_state, None], "legal_actions": [np.where(self.__state.all_legal_moves() == 1)[0], None], "current_player": self.to_play} return TimeStep(observations=observations, rewards=[0.0, 0.0], discounts=[self.__discount_factor] * self.__num_players, step_type=StepType.FIRST)