예제 #1
0
from Q_Learning_Tic_Tac_Toe import Game, HumanPlayer, QPlayer, Board

board = Board()

Q = pickle.load(open("Q_epsilon_09_Nepisodes_20000.p", "rb"))

root = tk.Tk()
player1 = QPlayer(mark="X", epsilon=0.01)
player2 = QPlayer(mark="O", epsilon=0.01)

game = Game(root, player1, player2, Q=Q)

N_episodes = 20000

for episodes in range(N_episodes):
    game.play()
    game.reset()

    if game.board.winner() is None:
        ws1.cell(row=i + 1, column=1, value=0)
    else:
        if game.current_player.mark == "X":
            ws1.cell(row=i + 1, column=1, value=1)
        elif game.current_player.mark == "O":
            ws1.cell(row=i + 1, column=1, value=-1)
    i += 1

#game.play()
#root.mainloop()

wb.save(filename=dest_filename)
import numpy as np
import tkinter as tk
import copy
from tqdm import tqdm

import pickle
from Q_Learning_Tic_Tac_Toe import Game, QPlayer  # Classes used for Tic Tac Toe

root = tk.Tk()
epsilon = 0.9
player1 = QPlayer(mark="X", epsilon=epsilon)
player2 = QPlayer(mark="O", epsilon=epsilon)
game = Game(root, player1, player2)

N_episodes = 200000
for episodes in tqdm(range(N_episodes)):
    game.play()
    game.reset(log=False)

Q = game.Q

filename = "Q_epsilon_09_Nepisodes_{}.p".format(N_episodes)
pickle.dump(Q, open(filename, "wb"))