from agents.minimax.minimax import RandomPlayer data_saver = Save_data() minimax = RandomPlayer(mark='X') q_player = DqnAgent(mark='O') q_table = Save_data().load_data(path='q_player_QS.p') if q_table is not None: q_player.Q = q_table print("The qlearn agent has loaded the qtable") EPISODES = 100_000 sys.setrecursionlimit(10000) game = NtxuvaGame(master=tk.Tk(), player1=minimax, player2=q_player) statistical_data = {} statistical_data_minimax = {} minimax_win = 0 q_player_win = 0 for episode in range(EPISODES): print(f"EPISODE --- {episode} --- ") try: winner = game.play() # statistics for q_player statistical_data[episode] = [ f"bad-moves: {game.player2.bad_moves}",
# Important classes from game.ntxuva_ttt_game import NtxuvaGame from extra_tools.save_data import Save_data from timeit import default_timer as timer from agents.qplayerandothers.players import QPlayer from agents.dqn.dqn import DqnAgent start = timer() EPISODES = 10 data_saver = Save_data(filename='dqn_vs_qlearn') game = NtxuvaGame(player1=DqnAgent(mark='X', rating=1000), player2=QPlayer(mark='O', rating=1000)) for episode in range(EPISODES): print(f" -- {episode} -- ") winner = game.play() if winner is not None: winner.wins = winner.wins + 1 game.reset() data_saver.save(players={1: game.player1, 2: game.player2}) end = timer()
# Defining episodes EPISODES = 100_000 # Initializing the players and trained data # The Random player is initialized here minimax_q = MinimaxQPlayer(mark='O', numActionsA=16, numActionsB=16) dqn_agent = DqnAgent(mark='X') Q = minimax_q.Q V = minimax_q.V pi = minimax_q.pi # Initializing the game environment game = NtxuvaGame(player1=dqn_agent, player2=minimax_q) sys.setrecursionlimit(10000) FILENAME = "dqn_vs_minimaxq.p" # Initializing the statistical data variables statistical_data = {} dqn_win = 0 minimax_q_win = 0 # Inidializing the statistical data Object named data_saver data_saver = Save_data() start = timer() # Starting the trainment from episode 0 to the last Episode episode = 0
from extra_tools.save_data import Save_data from timeit import default_timer as timer # Defining episodes EPISODES = 5_000 # Initializing the players and trainded data # The Random player is initialized here minimax_q = MinimaxQPlayer(mark='O', numActionsA=16, numActionsB=16) minimax = RandomPlayer(mark='X', prof=1) Q = minimax_q.Q V = minimax_q.V pi = minimax_q.pi # Initializing the game environment game = NtxuvaGame(player1=minimax, player2=minimax_q) sys.setrecursionlimit(10000) FILENAME = "minimax_vs_minimaxq" # Initializing the statistical data variables statistical_data = {} minimax_win = 0 minimax_q_win = 0 # Inidializing the statistical data Object named data_saver data_saver = Save_data() start = timer() # Starting the trainment from episode 0 to the last Episode episode = 0
import tkinter as tk import sys import pickle as pickle # cPickle is for Python 2.x only; in Python 3, simply "import pickle" and the accelerated version will be used automatically if available from game.ntxuva_ttt_game import NtxuvaGame, DqnAgent tk.wantobjects = False root = tk.Tk() epsilon = 0.9 player1 = DqnAgent(mark="X", epsilon=epsilon) player2 = DqnAgent(mark="O", epsilon=epsilon) game = NtxuvaGame(root, player1, player2) sys.setrecursionlimit(10000) N_episodes = 45000 for episodes in range(N_episodes): print('Episode ', episodes) try: game.play() except RecursionError: Q = game.Q filename = "Q_table_ntxuva_dictionary_%s.p" % episodes pickle.dump(Q, open(filename, "wb")) except MemoryError: Q = game.Q filename = "Q_table_ntxuva_dictionary_%s.p" % episodes pickle.dump(Q, open(filename, "wb")) game.reset()
# Important classes from timeit import default_timer as timer from game.ntxuva_ttt_game import NtxuvaGame from extra_tools.save_data import Save_data from extra_tools.elo_rating import EloRater from agents.qplayerandothers.players import QPlayer, RandomPlayer start = timer() EPISODES = 100_000 game = NtxuvaGame(player1=RandomPlayer(mark='X'), player2=QPlayer(mark='O')) data_saver = Save_data(player1=game.player1, player2=game.player2, filename=f'qlearn_vs_random_{EPISODES}') p1_rating = data_saver.get_elo(game.player1.player_name) p2_rating = data_saver.get_elo(game.player2.player_name) elo = EloRater(game.player1, game.player2, K=1) for episode in range(EPISODES): print(f" -- {episode} -- ") winner = game.play() if winner is not None: winner.wins = winner.wins + 1