def test_opposing_duo():
    '''A sanity test for a look-ahead agent'''
    # _._._._.
    # ._._._._
    # _._._._.
    # ._._._b_
    # _._._._.
    # ._._._w_
    # _._._._.
    # ._._._._
    board = Checkers.empty_board()
    board['black']['men'].add(15)
    board['white']['men'].add(23)
    ch = Checkers(board=board, turn='black')
    black_player = MinimaxPlayer('black', search_depth=2)
    from_sq, to_sq = black_player.next_move(ch.board, ch.last_moved_piece)
    assert to_sq == 19, 'Should move to the edge for safety.'
    def __init__(self, color, seed=None):
        assert color in Checkers.all_players, '`color` must be in %r.' % Checkers.all_players

        # Which side is being played
        self.color = color
        # Internal simulator for rollouts
        self.simulator = Checkers()
        # Fixing the random state for easy replications
        self.random = np.random.RandomState(seed=seed)
Beispiel #3
0
 def successor(st):
     sim = Checkers()
     state = MctsPlayer.convert_to_state(st)
     sim.restore_state(state)
     next_sts = []
     moves = sorted(sim.legal_moves())
     for move in moves:
         sim.restore_state(state)
         board, turn, last_moved_piece, _, winner = sim.move(*move)
         next_state = board, turn, last_moved_piece
         next_st = MctsPlayer.immutable_state(*next_state)
         next_sts.append(next_st)
     return next_sts
Beispiel #4
0
def test_single_piece_moved():
    '''Only one piece is allowed to move within one s turn'''
    # _b_._._.
    # ._w_b_._
    # _._._w_.
    # ._._w_._
    # _._._._.
    # ._._._._
    # _._._._.
    # ._._._._
    board = Checkers.empty_board()
    board['black']['men'].update([0, 6])
    board['white']['men'].update([5, 5 + 5, 14])
    ch = Checkers(board=board)
    state = ch.save_state()
    ch.move(0, 9)
    assert ch.turn == 'black' and ch.last_moved_piece == 9, 'Black still have more jumps.'
    ch.restore_state(state)
    ch.move(6, 15)
    assert ch.turn == 'white', 'Black finished its turn.'
Beispiel #5
0
def test_multi_jump():
    '''Handling a multi-jump'''
    # _._b_._.
    # ._w_._._
    # _._._._.
    # ._w_._._
    # _._._._.
    # ._w_._._
    # _._._._.
    # ._._._._
    board = Checkers.empty_board()
    board['black']['men'].add(1)
    board['white']['men'].update([5, 5 + 4 * 2, 5 + 4 * 4])
    ch = Checkers(board=board)
    assert len(ch.legal_moves()) == 1, 'Should only have one legal move.'
    assert ch.legal_moves()[0] == (1,
                                   8), 'The only legal move should be (1, 8).'
    ch.move(1, 8)
    assert ch.turn == 'black', 'Black still have more jumps so it is Black\'s turn.'
Beispiel #6
0
 def rollout(self, st):
     '''Rollout till the game ends with a win/draw'''
     sim = Checkers()
     state = MctsPlayer.convert_to_state(st)
     sim.restore_state(state)
     ply = 0
     moves = sim.legal_moves()
     # Check for a terminal state
     if len(moves) == 0:
         # One player wins
         winner = 'white' if st[1] == 'black' else 'black'
     else:
         winner = None
     while ply < self.max_plies and winner is None:
         from_sq, to_sq = self.rollout_policy(moves)
         board, turn, last_moved_piece, moves, winner = sim.move(from_sq, to_sq, skip_check=True)
         ply += 1
     # Returns the winner or None in a draw
     return winner, ply
Beispiel #7
0
 def immutable_state(board, turn, last_moved_piece):
     return Checkers.immutable_board(board), turn, last_moved_piece
Beispiel #8
0
    '''
    # TODO
    pass


if __name__ == '__main__':
    from checkers.agents.baselines import play_a_game, RandomPlayer
    # from checkers.agents.baselines import keyboard_player_move

    # A few matches against a random player
    max_game_len = 200
    n_matches = 1
    n_wins, n_draws, n_losses = 0, 0, 0
    for i in range(n_matches):
        print('game', i)
        ch = Checkers()
        black_player = MinimaxPlayer(
            'black',
            value_func=partial(first_order_adv, 'black', 200, 100, 20, 0),
            # The provided legal moves might be ordered differently
            rollout_order_gen=lambda x: sorted(x),
            search_depth=4,
            seed=i)
        white_player = MinimaxPlayer('white', value_func=partial(material_value_adv, 'white', 2, 1), search_depth=4, seed=i * 2)
        white_player = RandomPlayer('white', seed=i * 2)
        winner = play_a_game(ch, black_player.next_move, white_player.next_move, max_game_len)
        # Play with a minimax player
        # play_a_game(ch, keyboard_player_move, white_player.next_move)
        print('black player evaluated %i positions in %.2fs (avg %.2f positions/s) effective branching factor %.2f' % (black_player.n_evaluated_positions, black_player.evaluation_dt, black_player.n_evaluated_positions / black_player.evaluation_dt, (black_player.n_evaluated_positions / black_player.ply) ** (1 / black_player.search_depth)))
        print('black player pruned', black_player.prunes.items())
        print()
Beispiel #9
0
    def next_move(self, board, last_moved_piece):
        # Initialize with root node
        st0 = MctsPlayer.immutable_state(board, self.color, last_moved_piece)

        round = 0
        while round < self.max_rounds:
            # Start from the root
            st = st0
            walked_sts = [st]

            # Selection
            # XXX search could be stuck in a loop here (when are very possible successors left)
            succ_sts = MctsPlayer.successor(st)
            while 0 < len(succ_sts) and len(succ_sts) == len(self.children[st]):
                # Not a terminal state and all children are expanded
                # Use tree policy, choose a successor according to according to Q_hat + UCB
                max_score = float('-inf')
                max_st = None
                turn = st[1]
                for next_st in self.children[st]:
                    # Upper confidence bound
                    next_q = self.q(turn, next_st) + self.exploration_coeff * MctsPlayer.ucb(self.stats[st][-1], self.stats[next_st][-1])
                    if max_score < next_q:
                        max_score = next_q
                        max_st = next_st
                st = max_st
                # Loop detection
                if st in walked_sts:
                    break
                # Add it to walked states in this round
                walked_sts.append(st)
                succ_sts = MctsPlayer.successor(st)

            # Expansion
            # Not an internal node, choose a successor state randomly
            succ_sts = MctsPlayer.successor(st)
            if 0 < len(succ_sts):
                # Not a terminal state
                next_idx = self.random.randint(len(succ_sts))
                next_st = succ_sts[next_idx]
                walked_sts.append(next_st)
                # Add this node to the tree
                self.children[st].add(next_st)
                st = next_st

            # Simulation
            # Rollout till the game ends with a default policy
            winner, ply = self.rollout(st)

            # Back-propagation
            # Update statistics on the walked nodes
            reward = self.discount ** ply
            for st in reversed(walked_sts):
                black_wins, white_wins, n_samples = self.stats[st]
                turn = st[1]
                # Update wins based on the turn
                black_wins += reward if winner == 'black' else 0
                white_wins += reward if winner == 'white' else 0
                self.stats[st] = black_wins, white_wins, n_samples + 1
                # This reduces the influence of lucky rollouts due to very long random rollout
                reward *= self.discount
            round += 1

        # Select a move after searching
        # print(len(self.children[st0]))
        sim = Checkers()
        state = MctsPlayer.convert_to_state(st0)
        sim.restore_state(state)
        moves = sorted(sim.legal_moves())
        # Q-maximizing move
        max_q, max_q_move = float('-inf'), None
        # Visit-maximizing move
        max_n, max_n_move = float('-inf'), None
        for move in moves:
            sim.restore_state(state)
            board, turn, last_moved_piece, _, _ = sim.move(*move)
            next_st = MctsPlayer.immutable_state(board, turn, last_moved_piece)
            if next_st in self.children[st0]:
                # Maximize Q for the player
                next_q = self.q(self.color, next_st)
                if max_q < next_q:
                    max_q = next_q
                    max_q_move = move
                n_samples = self.stats[next_st][-1]
                if max_n < n_samples:
                    max_n = n_samples
                    max_n_move = move
                # print(move, '%.2f' % next_q, n_samples, next_st[1], self.stats[next_st], self.stats[next_st][0] + self.stats[next_st][1] - self.stats[next_st][-1])
        # Print some statistics
        print('V_hat(player) - V_hat(opponent) = %.2f' % (self.q(self.color, st0) - self.q('white' if self.color == 'black' else 'black', st0)))
        leaf_counts = MctsPlayer.hist_leaf_depth(self.children, st0)
        print('leaf depth histogram (depth, count):', sorted(leaf_counts.items()), 'max depth', max(leaf_counts.keys()))
        return max_q_move
    winner = None
    while winner is None and ply < max_plies:
        tot_moves += len(moves)
        # The current game state
        checkers.print_board()
        print(ply, 'turn:', turn, 'last_moved_piece:', last_moved_piece)
        print('%i legal moves %r' % (len(moves), moves))
        # Select a legal move for the current player
        from_sq, to_sq = players[turn](board, last_moved_piece)
        print(turn, 'moved %i, %i' % (from_sq, to_sq))
        print()
        # Update the game
        board, turn, last_moved_piece, moves, winner = checkers.move(from_sq, to_sq)
        ply += 1
    if winner is None:
        print('draw')
    else:
        print('%s player wins' % winner)
    print('total legal moves', tot_moves, 'avg branching factor', tot_moves / ply)
    return winner


if __name__ == '__main__':
    ch = Checkers()
    ch.print_empty_board()

    black_random_player = RandomPlayer('black', seed=0)
    white_random_player = RandomPlayer('white', seed=1)
    play_a_game(ch, black_random_player.next_move, white_random_player.next_move)
    # play_a_game(ch, keyboard_player_move, keyboard_player_move)