예제 #1
0
def simulate(iterations, explore_only = False, save_agent1 = None):
    """
        iterations (int)
        explore_only (bool) - If true, then only explore.
                              Else,  follow an epsilon-greedy policy that lowers the probability to explore over time.
    """

    # Construct game board
    game = Board()
    exploration_probability = 1
    
    # Initialize players
    agent1 = Agent(player1_symbol, game)
    agent2 = Agent(player2_symbol, game)

    # Counters for wins of each agent and total number of games
    nbr_wins_agent1 = 0
    nbr_wins_agent2 = 0
    nbr_games = 0

    # Pick current player
    current_player = player1_symbol

    # Epsilon-greedy 
    exploration_probability = 1.0

    # Start iterations
    for i in tqdm(range(iterations)):

        # Check if games has ended, reset if True
        if game.checkGameEnded():
            nbr_games += 1
            game.resetGame()
            agent1.updatePossibleActions()
            agent2.updatePossibleActions()

        # Check who is the current player 
        if current_player == agent1.symbol:
            a = agent1
        else:
            a = agent2

        # Explore
        if explore_only is True or random.random() < exploration_probability:
            a.performRandomAction(updateQ=True)
        # Exploit
        else:
            best_action = a.getBestAction()
            a.performAction(best_action, updateQ=(eval==False))

        # Reduce probability to explore during training
        # Do not remove completely
        if exploration_probability > 0.2:
            exploration_probability -= 1/iterations

        # Check if there is a winner
        winner = game.checkWinner() # Returns 0 if there is no winner
        if winner != 0:

            # Reset game and retrieve 
            nbr_games += 1
            game.resetGame()

            # Add to count for corresponding winner
            if winner == agent1.symbol:
                nbr_wins_agent1 += 1
            else:
                nbr_wins_agent2 += 1
        
        # Swap player
        if current_player == player1_symbol:
            current_player = player2_symbol
        else:
            current_player = player1_symbol

        
    # Print outcome
    print(nbr_wins_agent1, nbr_wins_agent2, nbr_games)    
    print("Win percentage: Agent 1 {:.2%}, Agent 2 {:.2%}.".format(nbr_wins_agent1/nbr_games, nbr_wins_agent2/nbr_games))

    if save_agent1 is not None:
        print("Saved trainer of agent 1 to {}".format(save_agent1))
        agent1.saveTrainer(save_agent1)

    # Return agents
    return agent1, agent2
예제 #2
0
class Application(tk.Frame):
    def __init__(self, master=None):
        tk.Frame.__init__(self, master)
        self.master.title("Tic-tac-toe")
        self.master.minsize(300, 300)
    
        # Start game
        self.board = Board(rows=3, cols=3, win_threshold=3)
        self.current_player = playerX if (random.random() < 0.5) else playerO
        
        # Train agent and assign it to this game
        print("Preparing agent")
        self.agent = Agent(playerX, self.board, load_trainer=agent_save)
        self.agent_symbol = self.agent.symbol

        # Create widgets
        self.createBoard(self.board)

        if self.current_player == self.agent_symbol:
            self.agentMove()

        self.pack(fill="both")

    def createBoard(self, board):

        # Text label
        self.info_label = tk.Label(self, text = "Player {}'s turn".format("X" if self.current_player == playerX else "O") )


        # Create buttons
        self.button00 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(0,0))
        self.button01 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(0,1))
        self.button02 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(0,2))

        self.button10 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(1,0))
        self.button11 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(1,1))
        self.button12 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(1,2))

        self.button20 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(2,0))
        self.button21 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(2,1))
        self.button22 = tk.Button(self, height = 4, width = 8, text=' ', font='Times 20 bold', bg='gray', fg='white', command = lambda : self.playMove(2,2))

        self.reset_button = tk.Button(self, text = "Reset", command = self.resetGame)
        self.Q_button = tk.Button(self, text="Display Q values", command = self.displayQ, relief='raised')

        # Insert to grid
        self.info_label.grid(row = 0, column = 1)

        self.button00.grid(row = 1, column = 0)
        self.button01.grid(row = 1, column = 1)
        self.button02.grid(row = 1, column = 2)

        self.button10.grid(row = 2, column = 0)
        self.button11.grid(row = 2, column = 1)
        self.button12.grid(row = 2, column = 2)
        
        self.button20.grid(row = 3, column = 0)
        self.button21.grid(row = 3, column = 1)
        self.button22.grid(row = 3, column = 2)

        self.reset_button.grid(row = 4, column = 1)
        self.Q_button.grid(row = 4, column = 2)


    def getText(self, x, y):
        value = self.board.getPosition(x, y)
        if value == playerX:
            return "X"
        elif value == playerO:
            return "O"
        else:
            return " "
    
    def getQValue(self, x, y):
        value = self.getText(x,y)
        if value == " ":
            action_hash = self.agent.getActionHash(np.asarray([x,y]))
            state_hash = self.board.getStateHash(inverted=True)
            Q = self.agent.trainer.getValueQ(state_hash, action_hash)
            return "{:.2f}".format(Q)
        else:
            return value

    def updateText(self):
        
        self.info_label.configure(text="Player {}'s turn".format("X" if self.current_player == playerX else "O"))

        self.button00.configure(text=self.getText(0,0), state=tk.ACTIVE)
        self.button01.configure(text=self.getText(0,1), state=tk.ACTIVE)
        self.button02.configure(text=self.getText(0,2), state=tk.ACTIVE)
        self.button10.configure(text=self.getText(1,0), state=tk.ACTIVE)
        self.button11.configure(text=self.getText(1,1), state=tk.ACTIVE)
        self.button12.configure(text=self.getText(1,2), state=tk.ACTIVE)
        self.button20.configure(text=self.getText(2,0), state=tk.ACTIVE)
        self.button21.configure(text=self.getText(2,1), state=tk.ACTIVE)
        self.button22.configure(text=self.getText(2,2), state=tk.ACTIVE)

        self.reset_button.configure(state=tk.ACTIVE)

    def displayQ(self):

        if self.Q_button.config('relief')[-1] == 'sunken':
            self.updateText()
            self.Q_button.configure(text="Display Q values", relief='raised')
        else:
            self.button00.configure(text=self.getQValue(0,0), state=tk.DISABLED)
            self.button01.configure(text=self.getQValue(0,1), state=tk.DISABLED)
            self.button02.configure(text=self.getQValue(0,2), state=tk.DISABLED)
            self.button10.configure(text=self.getQValue(1,0), state=tk.DISABLED)
            self.button11.configure(text=self.getQValue(1,1), state=tk.DISABLED)
            self.button12.configure(text=self.getQValue(1,2), state=tk.DISABLED)
            self.button20.configure(text=self.getQValue(2,0), state=tk.DISABLED)
            self.button21.configure(text=self.getQValue(2,1), state=tk.DISABLED)
            self.button22.configure(text=self.getQValue(2,2), state=tk.DISABLED)
            self.reset_button.configure(state=tk.DISABLED)

            self.Q_button.configure(text="Hide Q values", relief='sunken')




    def playMove(self, x, y):
        
        # Raise error because position is non-empty
        if self.board.getPosition(x,y) == playerX or self.board.getPosition(x,y) == playerO:
            print("Error: This action is not possible, pick another position.")
        else:
            # Update board
            self.board.setPosition(x, y, self.current_player)

            # Swap player
            if self.current_player == playerX:
                self.current_player = playerO
            else:
                self.current_player = playerX

            # Update text
            self.updateText()

            # Check if game has ended
            winner = self.board.checkWinner()
            if winner != 0 or self.board.checkGameEnded():
                self.endGame()

            elif self.current_player == self.agent_symbol:
                self.agentMove()

    def agentMove(self):
        move = self.agent.getBestAction()
        time.sleep(random.random()*1 + 0.5)
        self.playMove(move[0], move[1])

    def endGame(self):
        winner = self.board.checkWinner()
        if winner == playerX:
            self.info_label.configure(text = "Player X won!") 
        elif winner == playerO:
            self.info_label.configure(text = "Player O won!") 
        else:
            self.info_label.configure(text = "Game tied") 

    def resetGame(self):
        self.board.resetGame()
        self.current_player = playerX if (random.random() < 0.5) else playerO
        if self.current_player == self.agent_symbol:
            self.agentMove()
        self.updateText()