import numpy as np
import tkinter as tk
import copy
import pickle
from Q_Learning_Tic_Tac_Toe import Game, QPlayer, RandomPlayer  # Classes used for Tic Tac Toe

root = tk.Tk()
epsilon = 2e-2
lr = 10e-3
player1 = QPlayer(mark="X", epsilon=epsilon,
                  learningRate=lr)  ### add parameters!
player2 = RandomPlayer(mark="O")
game = Game(root, player1, player2)  #,board_size=5,streak_size=4)

N_episodes = 1000
for episodes in range(N_episodes):
    game.play()
    game.reset()

Q = game.Q

filename = "Q_epsilon_09_Nepisodes_{}.p".format(N_episodes)
pickle.dump(Q, open(filename, "wb"))
Esempio n. 2
0
ws1 = wb.active
ws1.title = "Training Results"

i = 1

from Q_Learning_Tic_Tac_Toe import Game, HumanPlayer, QPlayer, Board

board = Board()

Q = pickle.load(open("Q_epsilon_09_Nepisodes_20000.p", "rb"))

root = tk.Tk()
player1 = QPlayer(mark="X", epsilon=0.01)
player2 = QPlayer(mark="O", epsilon=0.01)

game = Game(root, player1, player2, Q=Q)

N_episodes = 20000

for episodes in range(N_episodes):
    game.play()
    game.reset()

    if game.board.winner() is None:
        ws1.cell(row=i + 1, column=1, value=0)
    else:
        if game.current_player.mark == "X":
            ws1.cell(row=i + 1, column=1, value=1)
        elif game.current_player.mark == "O":
            ws1.cell(row=i + 1, column=1, value=-1)
    i += 1
import numpy as np
import tkinter as tk
import copy
from tqdm import tqdm

import pickle
from Q_Learning_Tic_Tac_Toe import Game, QPlayer  # Classes used for Tic Tac Toe

root = tk.Tk()
epsilon = 0.9
player1 = QPlayer(mark="X", epsilon=epsilon)
player2 = QPlayer(mark="O", epsilon=epsilon)
game = Game(root, player1, player2)

N_episodes = 200000
for episodes in tqdm(range(N_episodes)):
    game.play()
    game.reset(log=False)

Q = game.Q

filename = "Q_epsilon_09_Nepisodes_{}.p".format(N_episodes)
pickle.dump(Q, open(filename, "wb"))