Beispiel #1
0
from agents.minimax.minimax import RandomPlayer

data_saver = Save_data()

minimax = RandomPlayer(mark='X')
q_player = DqnAgent(mark='O')
q_table = Save_data().load_data(path='q_player_QS.p')

if q_table is not None:
    q_player.Q = q_table
    print("The qlearn agent has loaded the qtable")

EPISODES = 100_000
sys.setrecursionlimit(10000)

game = NtxuvaGame(master=tk.Tk(), player1=minimax, player2=q_player)

statistical_data = {}
statistical_data_minimax = {}

minimax_win = 0
q_player_win = 0

for episode in range(EPISODES):
    print(f"EPISODE --- {episode} --- ")
    try:

        winner = game.play()
        # statistics for q_player
        statistical_data[episode] = [
            f"bad-moves: {game.player2.bad_moves}",
Beispiel #2
0
# Important classes
from game.ntxuva_ttt_game import NtxuvaGame
from extra_tools.save_data import Save_data
from timeit import default_timer as timer
from agents.qplayerandothers.players import QPlayer
from agents.dqn.dqn import DqnAgent

start = timer()

EPISODES = 10

data_saver = Save_data(filename='dqn_vs_qlearn')

game = NtxuvaGame(player1=DqnAgent(mark='X', rating=1000), player2=QPlayer(mark='O', rating=1000))


for episode in range(EPISODES):

    print(f" -- {episode} -- ")

    winner = game.play()

    if winner is not None:
        winner.wins = winner.wins + 1

    game.reset()

data_saver.save(players={1: game.player1,
                         2: game.player2})
end = timer()
Beispiel #3
0
# Defining episodes
EPISODES = 100_000

# Initializing the players and trained data
# The Random player is initialized here
minimax_q = MinimaxQPlayer(mark='O', numActionsA=16, numActionsB=16)

dqn_agent = DqnAgent(mark='X')

Q = minimax_q.Q
V = minimax_q.V
pi = minimax_q.pi

# Initializing the game environment
game = NtxuvaGame(player1=dqn_agent, player2=minimax_q)

sys.setrecursionlimit(10000)
FILENAME = "dqn_vs_minimaxq.p"

# Initializing the statistical data variables
statistical_data = {}
dqn_win = 0
minimax_q_win = 0

# Inidializing the statistical data Object named data_saver
data_saver = Save_data()

start = timer()
# Starting the trainment from episode 0 to the last Episode
episode = 0
Beispiel #4
0
from extra_tools.save_data import Save_data
from timeit import default_timer as timer

# Defining episodes
EPISODES = 5_000

# Initializing the players and trainded data
# The Random player is initialized here
minimax_q = MinimaxQPlayer(mark='O', numActionsA=16, numActionsB=16)
minimax = RandomPlayer(mark='X', prof=1)
Q = minimax_q.Q
V = minimax_q.V
pi = minimax_q.pi

# Initializing the game environment
game = NtxuvaGame(player1=minimax, player2=minimax_q)

sys.setrecursionlimit(10000)
FILENAME = "minimax_vs_minimaxq"

# Initializing the statistical data variables
statistical_data = {}
minimax_win = 0
minimax_q_win = 0

# Inidializing the statistical data Object named data_saver
data_saver = Save_data()

start = timer()
# Starting the trainment from episode 0 to the last Episode
episode = 0
Beispiel #5
0
import tkinter as tk
import sys
import pickle as pickle  # cPickle is for Python 2.x only; in Python 3, simply "import pickle" and the accelerated version will be used automatically if available
from game.ntxuva_ttt_game import NtxuvaGame, DqnAgent

tk.wantobjects = False
root = tk.Tk()
epsilon = 0.9
player1 = DqnAgent(mark="X", epsilon=epsilon)
player2 = DqnAgent(mark="O", epsilon=epsilon)
game = NtxuvaGame(root, player1, player2)

sys.setrecursionlimit(10000)

N_episodes = 45000

for episodes in range(N_episodes):
    print('Episode ', episodes)

    try:
        game.play()
    except RecursionError:
        Q = game.Q
        filename = "Q_table_ntxuva_dictionary_%s.p" % episodes
        pickle.dump(Q, open(filename, "wb"))
    except MemoryError:
        Q = game.Q
        filename = "Q_table_ntxuva_dictionary_%s.p" % episodes
        pickle.dump(Q, open(filename, "wb"))
    game.reset()
Beispiel #6
0
# Important classes
from timeit import default_timer as timer
from game.ntxuva_ttt_game import NtxuvaGame
from extra_tools.save_data import Save_data
from extra_tools.elo_rating import EloRater
from agents.qplayerandothers.players import QPlayer, RandomPlayer

start = timer()

EPISODES = 100_000

game = NtxuvaGame(player1=RandomPlayer(mark='X'), player2=QPlayer(mark='O'))

data_saver = Save_data(player1=game.player1,
                       player2=game.player2,
                       filename=f'qlearn_vs_random_{EPISODES}')
p1_rating = data_saver.get_elo(game.player1.player_name)
p2_rating = data_saver.get_elo(game.player2.player_name)

elo = EloRater(game.player1, game.player2, K=1)

for episode in range(EPISODES):

    print(f" -- {episode} -- ")

    winner = game.play()

    if winner is not None:
        winner.wins = winner.wins + 1