def test_opposing_duo(): '''A sanity test for a look-ahead agent''' # _._._._. # ._._._._ # _._._._. # ._._._b_ # _._._._. # ._._._w_ # _._._._. # ._._._._ board = Checkers.empty_board() board['black']['men'].add(15) board['white']['men'].add(23) ch = Checkers(board=board, turn='black') black_player = MinimaxPlayer('black', search_depth=2) from_sq, to_sq = black_player.next_move(ch.board, ch.last_moved_piece) assert to_sq == 19, 'Should move to the edge for safety.'
def __init__(self, color, seed=None): assert color in Checkers.all_players, '`color` must be in %r.' % Checkers.all_players # Which side is being played self.color = color # Internal simulator for rollouts self.simulator = Checkers() # Fixing the random state for easy replications self.random = np.random.RandomState(seed=seed)
def successor(st): sim = Checkers() state = MctsPlayer.convert_to_state(st) sim.restore_state(state) next_sts = [] moves = sorted(sim.legal_moves()) for move in moves: sim.restore_state(state) board, turn, last_moved_piece, _, winner = sim.move(*move) next_state = board, turn, last_moved_piece next_st = MctsPlayer.immutable_state(*next_state) next_sts.append(next_st) return next_sts
def test_single_piece_moved(): '''Only one piece is allowed to move within one s turn''' # _b_._._. # ._w_b_._ # _._._w_. # ._._w_._ # _._._._. # ._._._._ # _._._._. # ._._._._ board = Checkers.empty_board() board['black']['men'].update([0, 6]) board['white']['men'].update([5, 5 + 5, 14]) ch = Checkers(board=board) state = ch.save_state() ch.move(0, 9) assert ch.turn == 'black' and ch.last_moved_piece == 9, 'Black still have more jumps.' ch.restore_state(state) ch.move(6, 15) assert ch.turn == 'white', 'Black finished its turn.'
def test_multi_jump(): '''Handling a multi-jump''' # _._b_._. # ._w_._._ # _._._._. # ._w_._._ # _._._._. # ._w_._._ # _._._._. # ._._._._ board = Checkers.empty_board() board['black']['men'].add(1) board['white']['men'].update([5, 5 + 4 * 2, 5 + 4 * 4]) ch = Checkers(board=board) assert len(ch.legal_moves()) == 1, 'Should only have one legal move.' assert ch.legal_moves()[0] == (1, 8), 'The only legal move should be (1, 8).' ch.move(1, 8) assert ch.turn == 'black', 'Black still have more jumps so it is Black\'s turn.'
def rollout(self, st): '''Rollout till the game ends with a win/draw''' sim = Checkers() state = MctsPlayer.convert_to_state(st) sim.restore_state(state) ply = 0 moves = sim.legal_moves() # Check for a terminal state if len(moves) == 0: # One player wins winner = 'white' if st[1] == 'black' else 'black' else: winner = None while ply < self.max_plies and winner is None: from_sq, to_sq = self.rollout_policy(moves) board, turn, last_moved_piece, moves, winner = sim.move(from_sq, to_sq, skip_check=True) ply += 1 # Returns the winner or None in a draw return winner, ply
def immutable_state(board, turn, last_moved_piece): return Checkers.immutable_board(board), turn, last_moved_piece
''' # TODO pass if __name__ == '__main__': from checkers.agents.baselines import play_a_game, RandomPlayer # from checkers.agents.baselines import keyboard_player_move # A few matches against a random player max_game_len = 200 n_matches = 1 n_wins, n_draws, n_losses = 0, 0, 0 for i in range(n_matches): print('game', i) ch = Checkers() black_player = MinimaxPlayer( 'black', value_func=partial(first_order_adv, 'black', 200, 100, 20, 0), # The provided legal moves might be ordered differently rollout_order_gen=lambda x: sorted(x), search_depth=4, seed=i) white_player = MinimaxPlayer('white', value_func=partial(material_value_adv, 'white', 2, 1), search_depth=4, seed=i * 2) white_player = RandomPlayer('white', seed=i * 2) winner = play_a_game(ch, black_player.next_move, white_player.next_move, max_game_len) # Play with a minimax player # play_a_game(ch, keyboard_player_move, white_player.next_move) print('black player evaluated %i positions in %.2fs (avg %.2f positions/s) effective branching factor %.2f' % (black_player.n_evaluated_positions, black_player.evaluation_dt, black_player.n_evaluated_positions / black_player.evaluation_dt, (black_player.n_evaluated_positions / black_player.ply) ** (1 / black_player.search_depth))) print('black player pruned', black_player.prunes.items()) print()
def next_move(self, board, last_moved_piece): # Initialize with root node st0 = MctsPlayer.immutable_state(board, self.color, last_moved_piece) round = 0 while round < self.max_rounds: # Start from the root st = st0 walked_sts = [st] # Selection # XXX search could be stuck in a loop here (when are very possible successors left) succ_sts = MctsPlayer.successor(st) while 0 < len(succ_sts) and len(succ_sts) == len(self.children[st]): # Not a terminal state and all children are expanded # Use tree policy, choose a successor according to according to Q_hat + UCB max_score = float('-inf') max_st = None turn = st[1] for next_st in self.children[st]: # Upper confidence bound next_q = self.q(turn, next_st) + self.exploration_coeff * MctsPlayer.ucb(self.stats[st][-1], self.stats[next_st][-1]) if max_score < next_q: max_score = next_q max_st = next_st st = max_st # Loop detection if st in walked_sts: break # Add it to walked states in this round walked_sts.append(st) succ_sts = MctsPlayer.successor(st) # Expansion # Not an internal node, choose a successor state randomly succ_sts = MctsPlayer.successor(st) if 0 < len(succ_sts): # Not a terminal state next_idx = self.random.randint(len(succ_sts)) next_st = succ_sts[next_idx] walked_sts.append(next_st) # Add this node to the tree self.children[st].add(next_st) st = next_st # Simulation # Rollout till the game ends with a default policy winner, ply = self.rollout(st) # Back-propagation # Update statistics on the walked nodes reward = self.discount ** ply for st in reversed(walked_sts): black_wins, white_wins, n_samples = self.stats[st] turn = st[1] # Update wins based on the turn black_wins += reward if winner == 'black' else 0 white_wins += reward if winner == 'white' else 0 self.stats[st] = black_wins, white_wins, n_samples + 1 # This reduces the influence of lucky rollouts due to very long random rollout reward *= self.discount round += 1 # Select a move after searching # print(len(self.children[st0])) sim = Checkers() state = MctsPlayer.convert_to_state(st0) sim.restore_state(state) moves = sorted(sim.legal_moves()) # Q-maximizing move max_q, max_q_move = float('-inf'), None # Visit-maximizing move max_n, max_n_move = float('-inf'), None for move in moves: sim.restore_state(state) board, turn, last_moved_piece, _, _ = sim.move(*move) next_st = MctsPlayer.immutable_state(board, turn, last_moved_piece) if next_st in self.children[st0]: # Maximize Q for the player next_q = self.q(self.color, next_st) if max_q < next_q: max_q = next_q max_q_move = move n_samples = self.stats[next_st][-1] if max_n < n_samples: max_n = n_samples max_n_move = move # print(move, '%.2f' % next_q, n_samples, next_st[1], self.stats[next_st], self.stats[next_st][0] + self.stats[next_st][1] - self.stats[next_st][-1]) # Print some statistics print('V_hat(player) - V_hat(opponent) = %.2f' % (self.q(self.color, st0) - self.q('white' if self.color == 'black' else 'black', st0))) leaf_counts = MctsPlayer.hist_leaf_depth(self.children, st0) print('leaf depth histogram (depth, count):', sorted(leaf_counts.items()), 'max depth', max(leaf_counts.keys())) return max_q_move
winner = None while winner is None and ply < max_plies: tot_moves += len(moves) # The current game state checkers.print_board() print(ply, 'turn:', turn, 'last_moved_piece:', last_moved_piece) print('%i legal moves %r' % (len(moves), moves)) # Select a legal move for the current player from_sq, to_sq = players[turn](board, last_moved_piece) print(turn, 'moved %i, %i' % (from_sq, to_sq)) print() # Update the game board, turn, last_moved_piece, moves, winner = checkers.move(from_sq, to_sq) ply += 1 if winner is None: print('draw') else: print('%s player wins' % winner) print('total legal moves', tot_moves, 'avg branching factor', tot_moves / ply) return winner if __name__ == '__main__': ch = Checkers() ch.print_empty_board() black_random_player = RandomPlayer('black', seed=0) white_random_player = RandomPlayer('white', seed=1) play_a_game(ch, black_random_player.next_move, white_random_player.next_move) # play_a_game(ch, keyboard_player_move, keyboard_player_move)