Ejemplo n.º 1
0
def play_tictactoe_turn_training(Q, state):
    """
        Play a single turn of tic tac toe while training.

        * * * UPDATES THE Q MODEL. * * *

        Returns the new board state and the next person's turn.
    """
    # train the Q on the board's rotations
    for _ in range(4):
        R = compute_R(state)
        if random.uniform(0, 1) < EPSILON:
            # exploration
            _, board_state = state
            action = pick_random_move(board_state)
        else:
            # exploitation
            action = suggest_move(Q, state)

        next_state = play_tictactoe_turn(action, state)

        # Update the Q model.
        Q[state][action] = ((1 - LEARNING_RATE) * Q[state][action] +
                            LEARNING_RATE *
                            (R[action] + GAMMA * max(Q[next_state])))

        state = get_rotated_board_state(state)
    return next_state
Ejemplo n.º 2
0
    def unit_test(first, AI, starting_percent=0):
        """
            Tests the Q model with the given parameters for the number_of_games
            Record it in the record dictionary.

            INPUT:
                (True, True, 0)

            who goes first:
                True/False
            who has ai:
                True/False/both/neither
            starting_percent:
                0/50, increment the progress percent on the output display.
        """
        for game in range(number_of_games):
            board = [None,None,None,None,None,None,None,None,None]
            board_state = tuple(board)

            turn = first
            winner = None

            state = (turn, board_state)

            while winner == None:
                # play match.

                # use AI (or not)
                if AI == turn or AI == both:
                    suggested_move = suggest_move(Q, state)
                    action = suggested_move
                else:
                    board_state = state[1]
                    action = pick_random_move(board_state)

                state = play_tictactoe_turn(action, state)
                turn, board_state = state
                winner = check_winner(board_state)
            else:
                # record outcome.
                record[first][AI][winner] += 1

                # show progress.
                fraction = game/number_of_games
                if not fraction % .01:
                    print(fraction * 50+starting_percent, "% done.")
Ejemplo n.º 3
0
def generate_initial_Q():
    """
        This builds the initial brain or 'Q'.

        Returns a dictionary of states associated with an array of actions.
        All actions are set to an intial value of zero.

        'Q' stands for 'Quality'.

    dictionary of states:
          state = (turn, board_state)

    associated with actions:
          actions = [0,0,0,0,0,0,0,0,0]

    Q = { state: actions }
    """

    Q = {}

    state = (True, (None, None, None, None, None, None, None, None, None))
    Q[state] = [0, 0, 0, 0, 0, 0, 0, 0, 0]
    state = (False, (None, None, None, None, None, None, None, None, None))
    Q[state] = [0, 0, 0, 0, 0, 0, 0, 0, 0]

    # play enough games to generate all states.
    for _ in range(100000):

        state, winner, _ = reset_game()

        while winner == None:

            _, board_state = state
            winner = check_winner(board_state)

            move_here = pick_random_move(board_state)
            action = move_here
            state = play_tictactoe_turn(action, state)

            if state not in Q:
                Q[state] = [0, 0, 0, 0, 0, 0, 0, 0, 0]

            add_board_rotations_to_Q(state, Q)

    return Q
Ejemplo n.º 4
0
 def test_play_tictactoe_turn(self):
     state = (False, (0,1,1,None,None,None,None,None,0))
     action = 3
     return_state = (False, (0,1,1,0,None,None,None,None,0))
     assert play_tictactoe_turn(action,state) is return_state
Ejemplo n.º 5
0



import sys
sys.path.append('../')
from functions.play import play_tictactoe_turn


state = (False, (0,1,1,None,None,None,None,None,0))
action = 3
return_state = (True, (0,1,1,0,None,None,None,None,0))

print(play_tictactoe_turn(action,state))