Esempio n. 1
0
    def _playout(self, game_state: go.GameState):
        """Run a single playout from the root to the leaf, getting a value at
        the leaf and propagating it back through its parents.
        State is modified in-place, so a copy must be provided.
        """
        node = self._root
        while True:
            if node.is_leaf():
                break
            action, node = node.select(self._c_puct)
            if action == PASS_MOVE:
                move = PASS_MOVE
            else:
                move = (action // game_state.size, action % game_state.size)
            game_state.do_move(move)

        if not game_state.is_end_of_game and len(game_state.get_legal_moves(False)) > 0:
            action_probs, leaf_value = self._policy_value_net.policy_value_fn(
                game_state
            )
            node.expand(action_probs)
        else:
            winner = game_state.get_winner()
            # for end state,return the "true" leaf_value
            if winner is None:  # tie
                leaf_value = 0.0
            else:
                leaf_value = 1.0 if winner == game_state.current_player else -1.0

        # Update value and visit count of nodes in this traversal.
        node.update_recursive(-leaf_value)
Esempio n. 2
0
    def _evaluate_rollout(self, game_state: go.GameState, limit=1000):
        """Use the rollout policy to play until the end of the game,
        returning +1 if the current player wins, -1 if the opponent wins,
        and 0 if it is a tie.
        """
        player = game_state.current_player
        for _ in range(limit):
            if game_state.is_end_of_game or len(
                    game_state.get_legal_moves(False)) == 0:
                break

            action_probs = _rollout_policy_fn(game_state)
            max_action = max(action_probs, key=itemgetter(1))[0]
            if max_action == PASS_MOVE:
                move = PASS_MOVE
            else:
                move = (max_action // game_state.size,
                        max_action % game_state.size)
            game_state.do_move(move)
        else:
            # If no break from the loop, issue a warning.
            print("WARNING: rollout reached move limit")
        winner = game_state.get_winner()
        if winner is None:  # tie
            return 0.0
        else:
            return 1.0 if winner == player else -1.0
Esempio n. 3
0
    def test_history_records_after_playing_a_move(self):
        game = GameState.get_new_game_state(SMALL_BOARD)

        game = go.play_move(game, (0, 0), BLACK)

        self.assertEqual(len(game.history), 1)
        np.testing.assert_array_equal(game.history[0], EMPTY_BOARD)
Esempio n. 4
0
    def test_move_is_incremented(self):
        game = GameState.get_new_game_state(SMALL_BOARD)
        self.assertEqual(game.moves, 0)

        game = go.play_move(game, (0, 0), BLACK)

        self.assertEqual(game.moves, 1)
Esempio n. 5
0
 def test_stone_can_be_captured_side(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (0, 4), BLACK)
     game = go.play_move(game, (0, 5), WHITE)
     game = go.play_move(game, (0, 3), WHITE)
     game = go.play_move(game, (1, 4), WHITE)
     self.assertEqual(game.board[0][4], EMPTY)
Esempio n. 6
0
 def test_cannot_do_a_suicide_move(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (5, 4), BLACK)
     game = go.play_move(game, (4, 5), BLACK)
     game = go.play_move(game, (4, 3), BLACK)
     game = go.play_move(game, (3, 4), BLACK)
     with self.assertRaises(IllegalMoveException):
         go.play_move(game, (4, 4), WHITE)
Esempio n. 7
0
def _rollout_policy_fn(game_state: go.GameState):
    """a coarse, fast version of policy_fn used in the rollout phase."""
    # rollout randomly
    legal_moves = (np.array(game_state.get_legal_moves(False)) @ np.array(
        [game_state.size, 1])).tolist()
    legal_moves = legal_moves + [PASS_MOVE]
    action_probs = np.random.rand(len(legal_moves))
    return zip(legal_moves, action_probs)
Esempio n. 8
0
    def test_player_can_play_a_move(self):
        game = GameState.get_new_game_state(SMALL_BOARD)
        board = np.copy(EMPTY_BOARD)
        board[0][0] = BLACK

        game = go.play_move(game, (0, 0), BLACK)

        np.testing.assert_array_equal(game.board, board)
Esempio n. 9
0
 def test_cannot_capture_own_stone_in_corner(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (0, 0), BLACK)
     game = go.play_move(game, (0, 1), BLACK)
     game = go.play_move(game, (0, 2), BLACK)
     self.assertEqual(game.board[0][0], BLACK)
     self.assertEqual(game.board[0][1], BLACK)
     self.assertEqual(game.board[0][2], BLACK)
Esempio n. 10
0
 def test_stone_can_be_captured_middle(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (4, 4), BLACK)
     game = go.play_move(game, (4, 5), WHITE)
     game = go.play_move(game, (4, 3), WHITE)
     game = go.play_move(game, (5, 4), WHITE)
     game = go.play_move(game, (3, 4), WHITE)
     self.assertEqual(game.board[4][4], EMPTY)
Esempio n. 11
0
def _policy_value_fn(game_state: go.GameState):
    """a function that takes in a state and outputs a list of (action, probability)
    tuples and a score for the state"""
    # return uniform probabilities and 0 score for pure MCTS
    legal_moves = (np.array(game_state.get_legal_moves(False)) @ np.array(
        [game_state.size, 1])).tolist()
    legal_moves = legal_moves + [PASS_MOVE]
    action_probs = np.ones(len(legal_moves)) / len(legal_moves)
    return zip(legal_moves, action_probs), 0
Esempio n. 12
0
 def test_cannot_do_a_suicide_move_in_the_corner(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (0, 2), BLACK)
     game = go.play_move(game, (0, 0), WHITE)
     game = go.play_move(game, (1, 1), BLACK)
     game = go.play_move(game, (0, 1), WHITE)
     game = go.play_move(game, (2, 0), BLACK)
     with self.assertRaises(IllegalMoveException):
         go.play_move(game, (1, 0), WHITE)
Esempio n. 13
0
def main():
    for k in range(200):
        game = GameState.get_new_game_state(SMALL_BOARD)
        for x in range(200):
            legal_moves = go.get_legal_moves(game)
            if not legal_moves:
                break
            move = random.choice(legal_moves)
            game = go.play_move(game, move)
        winner_color, points_difference = go.get_winner(game)
Esempio n. 14
0
 def test_cannot_capture_own_stone_on_side(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (0, 4), BLACK)
     game = go.play_move(game, (1, 4), BLACK)
     game = go.play_move(game, (0, 5), BLACK)
     game = go.play_move(game, (0, 3), BLACK)
     self.assertEqual(game.board[0][4], BLACK)
     self.assertEqual(game.board[1][4], BLACK)
     self.assertEqual(game.board[0][5], BLACK)
     self.assertEqual(game.board[0][3], BLACK)
Esempio n. 15
0
 def test_group_can_be_captured_corner(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (0, 0), BLACK)
     game = go.play_move(game, (0, 1), BLACK)
     game = go.play_move(game, (1, 0), BLACK)
     game = go.play_move(game, (0, 2), WHITE)
     game = go.play_move(game, (2, 0), WHITE)
     game = go.play_move(game, (1, 1), WHITE)
     self.assertEqual(game.board[0][0], EMPTY)
     self.assertEqual(game.board[0][1], EMPTY)
     self.assertEqual(game.board[1][0], EMPTY)
Esempio n. 16
0
    def test_history_records_after_playing_two_moves(self):
        game = GameState.get_new_game_state(SMALL_BOARD)
        board_first_move = np.zeros(SMALL_BOARD)
        board_first_move[0][0] = BLACK

        game = go.play_move(game, (0, 0))
        game = go.play_move(game, (0, 1))

        self.assertEqual(len(game.history), 2)
        np.testing.assert_array_equal(game.history[0], EMPTY_BOARD)
        np.testing.assert_array_equal(game.history[1], board_first_move)
Esempio n. 17
0
 def test_cannot_capture_own_stone_center(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (4, 4), BLACK)
     game = go.play_move(game, (4, 5), BLACK)
     game = go.play_move(game, (4, 3), BLACK)
     game = go.play_move(game, (3, 4), BLACK)
     game = go.play_move(game, (5, 4), BLACK)
     self.assertEqual(game.board[4][4], BLACK)
     self.assertEqual(game.board[4][5], BLACK)
     self.assertEqual(game.board[4][3], BLACK)
     self.assertEqual(game.board[3][4], BLACK)
     self.assertEqual(game.board[5][4], BLACK)
Esempio n. 18
0
class play_match(object):
    """docstring for ClassName"""
    def __init__(self, player1, player2, save_dir, size=19):
        # super(ClassName, self).__init__()
        self.player1 = player1
        self.player2 = player2
        self.state = GameState(save_dir, size=size)
        # I Propose that GameState should take a top-level save directory,
        # then automatically generate the specific file name

    def _play(self, player):
        move = player.get_move(self.state)
        end_of_game = self.state.do_move(move)
        self.state.write_to_disk()
        return end_of_game

    def play(self):
        """Play one move of match, update game state, save to disk"""
        end_of_game = self._play(self.player1)
        if not end_of_game:
            end_of_game = self._play(self.player2)
        return end_of_game
Esempio n. 19
0
    def get_move(self, game_state: go.GameState):
        """Runs all playouts sequentially and returns the most visited action.
        game_state: the current game state

        Return: the selected action
        """
        for _ in range(self._n_playout):
            state_copy = game_state.copy()
            self._playout(state_copy)
        if self._root.is_leaf():
            return PASS_MOVE
        return max(self._root._children.items(),
                   key=lambda act_node: act_node[1]._n_visits)[0]
Esempio n. 20
0
class play_match(object):
    """docstring for ClassName"""

    def __init__(self, player1, player2, save_dir, size=19):
        # super(ClassName, self).__init__()
        self.player1 = player1
        self.player2 = player2
        self.state = GameState(save_dir, size=size)
        # I Propose that GameState should take a top-level save directory,
        # then automatically generate the specific file name

    def _play(self, player):
        move = player.get_move(self.state)
        end_of_game = self.state.do_move(move)
        self.state.write_to_disk()
        return end_of_game

    def play(self):
        """Play one move of match, update game state, save to disk"""
        end_of_game = self._play(self.player1)
        if not end_of_game:
            end_of_game = self._play(self.player2)
        return end_of_game
Esempio n. 21
0
 def test_count_score(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (1, 0), BLACK)
     game = go.play_move(game, (1, 1), BLACK)
     game = go.play_move(game, (1, 2), BLACK)
     game = go.play_move(game, (1, 3), BLACK)
     game = go.play_move(game, (1, 4), BLACK)
     game = go.play_move(game, (1, 5), BLACK)
     game = go.play_move(game, (1, 6), BLACK)
     game = go.play_move(game, (1, 7), BLACK)
     game = go.play_move(game, (1, 8), BLACK)
     game = go.play_move(game, (2, 0), WHITE)
     game = go.play_move(game, (2, 1), WHITE)
     game = go.play_move(game, (2, 2), WHITE)
     game = go.play_move(game, (2, 3), WHITE)
     game = go.play_move(game, (2, 4), WHITE)
     game = go.play_move(game, (2, 5), WHITE)
     game = go.play_move(game, (2, 6), WHITE)
     game = go.play_move(game, (2, 7), WHITE)
     game = go.play_move(game, (2, 8), WHITE)
     self.assertEqual(go.get_winner(game), (WHITE, 50.5))
Esempio n. 22
0
    def get_move_probs(self, game_state: go.GameState):
        """Run all playouts sequentially and return the available actions and
        their corresponding probabilities.
        state: the current game state
        temp: temperature parameter in (0, 1] controls the level of exploration
        """
        for _ in range(self._n_playout):
            state_copy = game_state.copy()
            self._playout(state_copy)

        if self._root.is_leaf():
            return None, None

        # calc the move probabilities based on visit counts at the root node
        act_visits = [
            (act, node._n_visits) for act, node in self._root._children.items()
        ]
        acts, visits = zip(*act_visits)
        act_probs = softmax(1.0 / self._temperature * np.log(np.array(visits) + 1e-10))

        return acts, act_probs
Esempio n. 23
0
 def test_count_score_with_dames(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (3, 0), BLACK)
     game = go.play_move(game, (3, 1), BLACK)
     game = go.play_move(game, (3, 2), BLACK)
     game = go.play_move(game, (3, 3), BLACK)
     game = go.play_move(game, (3, 4), BLACK)
     game = go.play_move(game, (3, 5), BLACK)
     game = go.play_move(game, (3, 6), BLACK)
     game = go.play_move(game, (3, 7), BLACK)
     game = go.play_move(game, (3, 8), BLACK)
     game = go.play_move(game, (4, 0), WHITE)
     game = go.play_move(game, (4, 1), WHITE)
     game = go.play_move(game, (4, 2), WHITE)
     game = go.play_move(game, (4, 3), WHITE)
     game = go.play_move(game, (5, 3), WHITE)
     game = go.play_move(game, (5, 4), WHITE)
     game = go.play_move(game, (5, 5), WHITE)
     game = go.play_move(game, (5, 6), WHITE)
     game = go.play_move(game, (5, 7), WHITE)
     game = go.play_move(game, (5, 8), WHITE)
     self.assertEqual(go.get_winner(game), (WHITE, 9.5))
Esempio n. 24
0
    def policy_value_fn(self, game_state: go.GameState):
        """
        input: board
        output: a list of (action, probability) tuples for each available
        action and the score of the board state
        """
        legal_moves = (np.array(game_state.get_legal_moves(False)) @ np.array(
            [game_state.size, 1])).tolist()
        legal_moves_idx = legal_moves + [game_state.size**2]
        legal_moves = legal_moves + [PASS_MOVE]

        current_input = np.expand_dims(get_current_input(game_state), axis=0)
        if self.use_gpu:
            log_act_probs, value = self.policy_value_net(
                Variable(torch.from_numpy(current_input)).cuda().float())
            act_probs = np.exp(log_act_probs.data.cpu().numpy().flatten())
        else:
            log_act_probs, value = self.policy_value_net(
                Variable(torch.from_numpy(current_input)).float())
            act_probs = np.exp(log_act_probs.data.numpy()).flatten()
        _act_probs = zip(legal_moves, act_probs[legal_moves_idx])
        value = value.data[0][0]
        return _act_probs, value
Esempio n. 25
0
 def __init__(self, player1, player2, save_dir, size=19):
     # super(ClassName, self).__init__()
     self.player1 = player1
     self.player2 = player2
     self.state = GameState(save_dir, size=size)
Esempio n. 26
0
 def __init__(self, player1, player2, save_dir, size=19):
     # super(ClassName, self).__init__()
     self.player1 = player1
     self.player2 = player2
     self.state = GameState(save_dir, size=size)
Esempio n. 27
0
 def test_move_cannot_be_placed_on_taken_place(self):
     with self.assertRaises(Exception):
         game = GameState.get_new_game_state(SMALL_BOARD)
         game = go.play_move(game, (0, 0))
         go.play_move(game, (0, 0))
Esempio n. 28
0
 def test_new_game_starts_empty(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     np.testing.assert_array_equal(game.board, np.zeros(SMALL_BOARD))
Esempio n. 29
0
 def __init__(self, state):        
             
     self.state = GameState()
     self.treenode = TreeNode()
Esempio n. 30
0
 def test_new_game_current_player_black(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     self.assertEqual(game.current_player, BLACK)
Esempio n. 31
0
 def test_current_player_alternates(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     self.assertEqual(game.current_player, BLACK)
     game = go.play_move(game, (0, 0))
     self.assertEqual(game.current_player, WHITE)
Esempio n. 32
0
 def test_proper_stone_is_played_if_not_indicated(self):
     game = GameState.get_new_game_state(SMALL_BOARD)
     game = go.play_move(game, (0, 0))
     self.assertEqual(game.board[0][0], BLACK)
     game = go.play_move(game, (0, 1))
     self.assertEqual(game.board[0][1], WHITE)