def _playout(self, game_state: go.GameState): """Run a single playout from the root to the leaf, getting a value at the leaf and propagating it back through its parents. State is modified in-place, so a copy must be provided. """ node = self._root while True: if node.is_leaf(): break action, node = node.select(self._c_puct) if action == PASS_MOVE: move = PASS_MOVE else: move = (action // game_state.size, action % game_state.size) game_state.do_move(move) if not game_state.is_end_of_game and len(game_state.get_legal_moves(False)) > 0: action_probs, leaf_value = self._policy_value_net.policy_value_fn( game_state ) node.expand(action_probs) else: winner = game_state.get_winner() # for end state,return the "true" leaf_value if winner is None: # tie leaf_value = 0.0 else: leaf_value = 1.0 if winner == game_state.current_player else -1.0 # Update value and visit count of nodes in this traversal. node.update_recursive(-leaf_value)
def _evaluate_rollout(self, game_state: go.GameState, limit=1000): """Use the rollout policy to play until the end of the game, returning +1 if the current player wins, -1 if the opponent wins, and 0 if it is a tie. """ player = game_state.current_player for _ in range(limit): if game_state.is_end_of_game or len( game_state.get_legal_moves(False)) == 0: break action_probs = _rollout_policy_fn(game_state) max_action = max(action_probs, key=itemgetter(1))[0] if max_action == PASS_MOVE: move = PASS_MOVE else: move = (max_action // game_state.size, max_action % game_state.size) game_state.do_move(move) else: # If no break from the loop, issue a warning. print("WARNING: rollout reached move limit") winner = game_state.get_winner() if winner is None: # tie return 0.0 else: return 1.0 if winner == player else -1.0
def test_history_records_after_playing_a_move(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 0), BLACK) self.assertEqual(len(game.history), 1) np.testing.assert_array_equal(game.history[0], EMPTY_BOARD)
def test_move_is_incremented(self): game = GameState.get_new_game_state(SMALL_BOARD) self.assertEqual(game.moves, 0) game = go.play_move(game, (0, 0), BLACK) self.assertEqual(game.moves, 1)
def test_stone_can_be_captured_side(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 4), BLACK) game = go.play_move(game, (0, 5), WHITE) game = go.play_move(game, (0, 3), WHITE) game = go.play_move(game, (1, 4), WHITE) self.assertEqual(game.board[0][4], EMPTY)
def test_cannot_do_a_suicide_move(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (5, 4), BLACK) game = go.play_move(game, (4, 5), BLACK) game = go.play_move(game, (4, 3), BLACK) game = go.play_move(game, (3, 4), BLACK) with self.assertRaises(IllegalMoveException): go.play_move(game, (4, 4), WHITE)
def _rollout_policy_fn(game_state: go.GameState): """a coarse, fast version of policy_fn used in the rollout phase.""" # rollout randomly legal_moves = (np.array(game_state.get_legal_moves(False)) @ np.array( [game_state.size, 1])).tolist() legal_moves = legal_moves + [PASS_MOVE] action_probs = np.random.rand(len(legal_moves)) return zip(legal_moves, action_probs)
def test_player_can_play_a_move(self): game = GameState.get_new_game_state(SMALL_BOARD) board = np.copy(EMPTY_BOARD) board[0][0] = BLACK game = go.play_move(game, (0, 0), BLACK) np.testing.assert_array_equal(game.board, board)
def test_cannot_capture_own_stone_in_corner(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 0), BLACK) game = go.play_move(game, (0, 1), BLACK) game = go.play_move(game, (0, 2), BLACK) self.assertEqual(game.board[0][0], BLACK) self.assertEqual(game.board[0][1], BLACK) self.assertEqual(game.board[0][2], BLACK)
def test_stone_can_be_captured_middle(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (4, 4), BLACK) game = go.play_move(game, (4, 5), WHITE) game = go.play_move(game, (4, 3), WHITE) game = go.play_move(game, (5, 4), WHITE) game = go.play_move(game, (3, 4), WHITE) self.assertEqual(game.board[4][4], EMPTY)
def _policy_value_fn(game_state: go.GameState): """a function that takes in a state and outputs a list of (action, probability) tuples and a score for the state""" # return uniform probabilities and 0 score for pure MCTS legal_moves = (np.array(game_state.get_legal_moves(False)) @ np.array( [game_state.size, 1])).tolist() legal_moves = legal_moves + [PASS_MOVE] action_probs = np.ones(len(legal_moves)) / len(legal_moves) return zip(legal_moves, action_probs), 0
def test_cannot_do_a_suicide_move_in_the_corner(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 2), BLACK) game = go.play_move(game, (0, 0), WHITE) game = go.play_move(game, (1, 1), BLACK) game = go.play_move(game, (0, 1), WHITE) game = go.play_move(game, (2, 0), BLACK) with self.assertRaises(IllegalMoveException): go.play_move(game, (1, 0), WHITE)
def main(): for k in range(200): game = GameState.get_new_game_state(SMALL_BOARD) for x in range(200): legal_moves = go.get_legal_moves(game) if not legal_moves: break move = random.choice(legal_moves) game = go.play_move(game, move) winner_color, points_difference = go.get_winner(game)
def test_cannot_capture_own_stone_on_side(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 4), BLACK) game = go.play_move(game, (1, 4), BLACK) game = go.play_move(game, (0, 5), BLACK) game = go.play_move(game, (0, 3), BLACK) self.assertEqual(game.board[0][4], BLACK) self.assertEqual(game.board[1][4], BLACK) self.assertEqual(game.board[0][5], BLACK) self.assertEqual(game.board[0][3], BLACK)
def test_group_can_be_captured_corner(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 0), BLACK) game = go.play_move(game, (0, 1), BLACK) game = go.play_move(game, (1, 0), BLACK) game = go.play_move(game, (0, 2), WHITE) game = go.play_move(game, (2, 0), WHITE) game = go.play_move(game, (1, 1), WHITE) self.assertEqual(game.board[0][0], EMPTY) self.assertEqual(game.board[0][1], EMPTY) self.assertEqual(game.board[1][0], EMPTY)
def test_history_records_after_playing_two_moves(self): game = GameState.get_new_game_state(SMALL_BOARD) board_first_move = np.zeros(SMALL_BOARD) board_first_move[0][0] = BLACK game = go.play_move(game, (0, 0)) game = go.play_move(game, (0, 1)) self.assertEqual(len(game.history), 2) np.testing.assert_array_equal(game.history[0], EMPTY_BOARD) np.testing.assert_array_equal(game.history[1], board_first_move)
def test_cannot_capture_own_stone_center(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (4, 4), BLACK) game = go.play_move(game, (4, 5), BLACK) game = go.play_move(game, (4, 3), BLACK) game = go.play_move(game, (3, 4), BLACK) game = go.play_move(game, (5, 4), BLACK) self.assertEqual(game.board[4][4], BLACK) self.assertEqual(game.board[4][5], BLACK) self.assertEqual(game.board[4][3], BLACK) self.assertEqual(game.board[3][4], BLACK) self.assertEqual(game.board[5][4], BLACK)
class play_match(object): """docstring for ClassName""" def __init__(self, player1, player2, save_dir, size=19): # super(ClassName, self).__init__() self.player1 = player1 self.player2 = player2 self.state = GameState(save_dir, size=size) # I Propose that GameState should take a top-level save directory, # then automatically generate the specific file name def _play(self, player): move = player.get_move(self.state) end_of_game = self.state.do_move(move) self.state.write_to_disk() return end_of_game def play(self): """Play one move of match, update game state, save to disk""" end_of_game = self._play(self.player1) if not end_of_game: end_of_game = self._play(self.player2) return end_of_game
def get_move(self, game_state: go.GameState): """Runs all playouts sequentially and returns the most visited action. game_state: the current game state Return: the selected action """ for _ in range(self._n_playout): state_copy = game_state.copy() self._playout(state_copy) if self._root.is_leaf(): return PASS_MOVE return max(self._root._children.items(), key=lambda act_node: act_node[1]._n_visits)[0]
def test_count_score(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (1, 0), BLACK) game = go.play_move(game, (1, 1), BLACK) game = go.play_move(game, (1, 2), BLACK) game = go.play_move(game, (1, 3), BLACK) game = go.play_move(game, (1, 4), BLACK) game = go.play_move(game, (1, 5), BLACK) game = go.play_move(game, (1, 6), BLACK) game = go.play_move(game, (1, 7), BLACK) game = go.play_move(game, (1, 8), BLACK) game = go.play_move(game, (2, 0), WHITE) game = go.play_move(game, (2, 1), WHITE) game = go.play_move(game, (2, 2), WHITE) game = go.play_move(game, (2, 3), WHITE) game = go.play_move(game, (2, 4), WHITE) game = go.play_move(game, (2, 5), WHITE) game = go.play_move(game, (2, 6), WHITE) game = go.play_move(game, (2, 7), WHITE) game = go.play_move(game, (2, 8), WHITE) self.assertEqual(go.get_winner(game), (WHITE, 50.5))
def get_move_probs(self, game_state: go.GameState): """Run all playouts sequentially and return the available actions and their corresponding probabilities. state: the current game state temp: temperature parameter in (0, 1] controls the level of exploration """ for _ in range(self._n_playout): state_copy = game_state.copy() self._playout(state_copy) if self._root.is_leaf(): return None, None # calc the move probabilities based on visit counts at the root node act_visits = [ (act, node._n_visits) for act, node in self._root._children.items() ] acts, visits = zip(*act_visits) act_probs = softmax(1.0 / self._temperature * np.log(np.array(visits) + 1e-10)) return acts, act_probs
def test_count_score_with_dames(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (3, 0), BLACK) game = go.play_move(game, (3, 1), BLACK) game = go.play_move(game, (3, 2), BLACK) game = go.play_move(game, (3, 3), BLACK) game = go.play_move(game, (3, 4), BLACK) game = go.play_move(game, (3, 5), BLACK) game = go.play_move(game, (3, 6), BLACK) game = go.play_move(game, (3, 7), BLACK) game = go.play_move(game, (3, 8), BLACK) game = go.play_move(game, (4, 0), WHITE) game = go.play_move(game, (4, 1), WHITE) game = go.play_move(game, (4, 2), WHITE) game = go.play_move(game, (4, 3), WHITE) game = go.play_move(game, (5, 3), WHITE) game = go.play_move(game, (5, 4), WHITE) game = go.play_move(game, (5, 5), WHITE) game = go.play_move(game, (5, 6), WHITE) game = go.play_move(game, (5, 7), WHITE) game = go.play_move(game, (5, 8), WHITE) self.assertEqual(go.get_winner(game), (WHITE, 9.5))
def policy_value_fn(self, game_state: go.GameState): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_moves = (np.array(game_state.get_legal_moves(False)) @ np.array( [game_state.size, 1])).tolist() legal_moves_idx = legal_moves + [game_state.size**2] legal_moves = legal_moves + [PASS_MOVE] current_input = np.expand_dims(get_current_input(game_state), axis=0) if self.use_gpu: log_act_probs, value = self.policy_value_net( Variable(torch.from_numpy(current_input)).cuda().float()) act_probs = np.exp(log_act_probs.data.cpu().numpy().flatten()) else: log_act_probs, value = self.policy_value_net( Variable(torch.from_numpy(current_input)).float()) act_probs = np.exp(log_act_probs.data.numpy()).flatten() _act_probs = zip(legal_moves, act_probs[legal_moves_idx]) value = value.data[0][0] return _act_probs, value
def __init__(self, player1, player2, save_dir, size=19): # super(ClassName, self).__init__() self.player1 = player1 self.player2 = player2 self.state = GameState(save_dir, size=size)
def test_move_cannot_be_placed_on_taken_place(self): with self.assertRaises(Exception): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 0)) go.play_move(game, (0, 0))
def test_new_game_starts_empty(self): game = GameState.get_new_game_state(SMALL_BOARD) np.testing.assert_array_equal(game.board, np.zeros(SMALL_BOARD))
def __init__(self, state): self.state = GameState() self.treenode = TreeNode()
def test_new_game_current_player_black(self): game = GameState.get_new_game_state(SMALL_BOARD) self.assertEqual(game.current_player, BLACK)
def test_current_player_alternates(self): game = GameState.get_new_game_state(SMALL_BOARD) self.assertEqual(game.current_player, BLACK) game = go.play_move(game, (0, 0)) self.assertEqual(game.current_player, WHITE)
def test_proper_stone_is_played_if_not_indicated(self): game = GameState.get_new_game_state(SMALL_BOARD) game = go.play_move(game, (0, 0)) self.assertEqual(game.board[0][0], BLACK) game = go.play_move(game, (0, 1)) self.assertEqual(game.board[0][1], WHITE)