Esempio n. 1
0
 def test_spot_to_action(self):
     self.assertEqual(
         GameWrapper.convert_spot_to_action(GameBoard.Spot(0, 0)), 0)
     self.assertEqual(
         GameWrapper.convert_spot_to_action(GameBoard.Spot(1, 0)), 8)
     self.assertEqual(
         GameWrapper.convert_spot_to_action(GameBoard.Spot(1, 2)), 10)
Esempio n. 2
0
    def test_action_to_spot(self):
        self.assertEqual(GameWrapper.convert_action_to_spot(0),
                         GameBoard.Spot(0, 0))
        self.assertEqual(GameWrapper.convert_action_to_spot(8),
                         GameBoard.Spot(1, 0))
        self.assertEqual(GameWrapper.convert_action_to_spot(10),
                         GameBoard.Spot(1, 2))

        with self.assertRaises(Exception) as context:
            GameWrapper.convert_action_to_spot(64)

        with self.assertRaises(Exception) as context:
            GameWrapper.convert_action_to_spot(-1)
 def reset(self) -> np.ndarray:
     # returns the observation of the board in one-d array. shape: (8*8,) float32
     self.game_board = GameBoard.GameBoard(self.board_size)
     self.current_player = GameBoard.PLAYER_1
     self.game_ended = False
     self.is_log_play = False
     return self.observe(GameBoard.PLAYER_1)
    def convert_action_to_spot(self, action: int):
        # if action < 0 or action >= self.get_action_size():
        #     raise ValueError('Action needs to be between [0, 64), received {0}' % action)

        row = int(action / self.board_size)
        col = int(action % self.board_size)  # remainder
        spot = GameBoard.Spot(row, col, self.board_size)
        return spot
 def reset(self, game_reset_random=False) -> np.ndarray:
     # if random game, apply another random
     game_reset_random = game_reset_random and random.choice(
         [True, False, False])
     # returns the observation of the board in one-d array. shape: (8*8,) float32
     self.game_board = GameBoard.GameBoard(self.board_size,
                                           random_start=game_reset_random)
     self.current_player = GameBoard.PLAYER_1
     self.game_ended = False
     self.is_log_play = False
     return self.observe(GameBoard.PLAYER_1)
    def execute_move(self, spot):
        move_result = self.game_board.make_a_move(self.current_player,
                                                  GameBoard.GameMove(spot))

        if not move_result.is_move_valid:
            raise ValueError('Invalid move.')

        self.game_board = move_result.new_game_board
        self.current_player = self.game_board.get_next_player(
            self.current_player)
        self.game_ended = self.game_board.game_ended

        return self
    def execute_move(
            self,
            action: int) -> Tuple[np.ndarray, float, int, GameWrapper, bool]:
        # action: 0 .. 64.
        #         0 .. 63 are placing a piece.
        #         64 - pass to opponent
        # Returns the tuple: (observation, reward of this step,
        #                     done in int, new GameWrapper object,
        #                     move_valid)
        #         1. observation: flat observation of the game board, shape (64,)
        #         2. reward of this step:
        #            - legal move: 1 * how many pieces flipped
        #            - illegal move: -1
        #            - pass to opponent: -2
        #            - if game is done, reward: 100 * (my-pieces - opponent-pieces)
        #         3. done: game ended. 0-not, 1- ended.
        #         4. new state of the Game, as GameWrapper
        #         5. is valid move? True/False
        # This method will flip pieces, then switch player, set current_player to next.
        # The returned observation is for the next player. For the convenience during training.
        if action < self.PASS_TURN_ACTION:
            # execute a move, place a piece
            spot = self.convert_action_to_spot(action)
            move_result = self.game_board.make_a_move(self.current_player,
                                                      GameBoard.GameMove(spot))
            self.game_board = move_result.new_game_board
            game_ended = self.game_board.game_ended

            # reward_of_this_move
            if game_ended:
                if self.current_player == GameBoard.PLAYER_1:
                    reward_of_this_move = (self.game_board.player_1_count -
                                           self.game_board.player_2_count)
                else:
                    reward_of_this_move = (self.game_board.player_2_count -
                                           self.game_board.player_1_count)
                if reward_of_this_move > 0:
                    reward_of_this_move = 10 * reward_of_this_move + 10
                else:
                    reward_of_this_move = 10 * reward_of_this_move - 10
            else:
                if move_result.is_move_valid:
                    reward_of_this_move = 1.5
                else:
                    reward_of_this_move = -0.1  # invalid move

            # update to new state. Switch player etc
            if move_result.is_move_valid:  # next player
                self.current_player = self.game_board.get_next_player(
                    self.current_player)
            self.game_ended = self.game_board.game_ended
            # observation for next player
            observation = self.observe(self.current_player)
            is_move_valid = move_result.is_move_valid
        else:  # pass, turn to opponent
            # no move, so just observe.
            # the current_player, not next player, observation
            num_possible_moves = len(
                self.game_board.get_valid_spots(self.current_player))
            reward_of_this_move = -2 * num_possible_moves
            game_ended = self.game_board.game_ended
            self.current_player = self.game_board.get_next_player(
                self.current_player)
            observation = self.observe(self.current_player)
            spot = None
            is_move_valid = True

        if self.is_log_play:
            self.log_move(spot, self)
        return (observation, reward_of_this_move, game_ended, self,
                is_move_valid)
 def __init__(self, id: int):
     self.id = id
     self.game_board = GameBoard.GameBoard()
     self.current_player = GameBoard.PLAYER_1
     self.game_ended = False