Beispiel #1
0
 def __init__(self, flatten_board_state=True, discount_factor=1.0):
     self.__state = go.Position(komi=0.5)
     self.__flatten_state = flatten_board_state
     self.__discount_factor = discount_factor
     N = int(os.environ.get("BOARD_SIZE"))
     self.__state_size = N**2
     self.__action_size = self.__state_size + 1  # board size and an extra action for "pass"
     self.__num_players = 2
Beispiel #2
0
 def reset(self):
     """
     reset the game at the beginning of the game to get an initial state
     :return: should reset the env and return a initial state
     """
     self.__state = go.Position(komi=0.5)
     if self.__flatten_state:
         _state = np.reshape(self.info_state, (self.__state_size,))
     else:
         _state = self.info_state
     observations = {"info_state": [_state, None],
                     "legal_actions": [np.where(self.__state.all_legal_moves() == 1)[0], None],
                     "current_player": self.to_play}
     return TimeStep(observations=observations, rewards=[0.0, 0.0],
                     discounts=[self.__discount_factor] * self.__num_players, step_type=StepType.FIRST)