Exemple #1
0
def main():
    log.info('Loading %s...', TicTacToeGame.__name__)
    g = TicTacToeGame()

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info(f'Loading checkpoint "{args.load_folder_file}" ...')
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
Exemple #2
0
import Arena
from MCTS import MCTS
from tictactoe.TicTacToeGame import TicTacToeGame, display
from tictactoe.TicTacToePlayers import *
from tictactoe.keras.NNet import NNetWrapper as NNet

import numpy as np
from utils import *

"""
use this script to play any two agents against each other, or play manually with
any agent.
"""

g = TicTacToeGame(3)

# all players
rp = RandomPlayer(g).play
# gp = TicTacToePlayer(g).play
hp = HumanTicTacToePlayer(g).play

# nnet players
n1 = NNet(g)
n1.load_checkpoint('./pretrained_models/tictactoe/keras/','best-25eps-25sim-10epch.pth.tar')
args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})
mcts1 = MCTS(g, n1, args1)
n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))


n2 = NNet(g)
n2.load_checkpoint('/dev/8x50x25/','best.pth.tar')
 def test_tictactoe_keras(self):
     self.execute_game_test(TicTacToeGame(), TicTacToeKerasNNet)
from utils import *
from time import time
from joblib import Parallel, delayed
import multiprocessing


def experiment(m):
    for rep in range(5):
        rp = RandomPlayer(g).play
        mcs = MCSAgent(g, nSims=100000000, time=m).play
        arena_rp_hp = Arena.Arena(mcs, rp, g, display=display)
        wins, loss, draw = arena_rp_hp.playGames(100, verbose=False)
        data.append([m, rep, wins, loss, draw])
    return data


print('Start Parallel')
global_start = time()
microsecs = np.array([
    5000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000, 1500000,
    2000000, 3000000
])
games = [3, 4, 5]
for i in games:
    global_start = time()
    g = TicTacToeGame(i)
    data = []
    data = Parallel(n_jobs=11)(delayed(experiment)(m) for m in microsecs)
    np.save('tictactoe_results_' + str(i), data)
    print('Game: ' + str(i) + ' Time: ' + str(time() - global_start))
Exemple #5
0
from tictactoe.TicTacToePlayers import *
from tictactoe.TicTacToeGame import TicTacToeGame,display
from tictactoe.TicTacToeLogic import Board
from tictactoe.keras.NNet import NNetWrapper as NNet
#from othello.pytorch.NNet import NNetWrapper as NNet

import numpy as np
from utils import *

"""
use this script to play any two agents against each other, or play manually with
any agent.
"""

#g = OthelloGame(6)
g = TicTacToeGame(Board.SIZE)

# all players
rp = RandomPlayer(g).play
#gp = GreedyOthelloPlayer(g).play
#hp = HumanOthelloPlayer(g).play
hp = HumanTicTacToePlayer(g).play

# nnet players
n1 = NNet(g)
#n1.load_checkpoint('./pretrained_models/othello/pytorch/','6x100x25_best.pth.tar')
#n1.load_checkpoint('./pretrained_models/tictactoe/keras','best-25eps-25sim-10epch.pth.tar')
n1.load_checkpoint('./temp/4x4/','best.pth.tar')

args1 = dotdict({'numMCTSSims': 50, 'cpuct':1.0})
mcts1 = MCTS(g, n1, args1)
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 40,
    'cpuct': 1,
}

loadSaveArgs = {'checkpoint': './checkpoints/tictactoe/keras/' + args_to_filename(model_params),
                'load_model': False,
                'load_folder_file': ('models/tictactoe/keras/' + args_to_filename(model_params), 'best.pth.tar'),
                'numItersForTrainExamplesHistory': 10}

args = dotdict({**model_params,**loadSaveArgs})

if __name__ == "__main__":

    game = TicTacToeGame(3)
    nnet = keras_tictactoe_neuralnet(game)
    c = Coach(game, nnet, args)

    pathlib.Path(args.checkpoint).mkdir(parents=True, exist_ok=True)
    if args.load_model:
        pathlib.Path(args.load_folder_file[0]).mkdir(parents=True, exist_ok=True)

        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])

        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Exemple #7
0
import sys
import numpy as np
from utils import *

humanPlayer = -1
computerPlayer = 1
"""
use this script as the backend to the tictactoeaz.py front-end.
"""


def eprint(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


game = TicTacToeGame()

# nnet players - The Computer Player
n1 = NNet(game)
n1.load_checkpoint('./pretrained_models/tictactoe/keras/',
                   'best-25eps-25sim-10epch.pth.tar')
args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0})
mcts1 = MCTS(game, n1, args1)
n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0))
board = game.getInitBoard()

while True:
    eprint("Enter msg Id:")
    msgId = int(input())

    if msgId == 0:
Exemple #8
0
from othello.tensorflow.NNet import NNetWrapper as NNet2

from connect4.Connect4Game import Connect4Game, display as display3
from connect4.Connect4Players import *
from connect4.tensorflow.NNet import NNetWrapper as NNet3

import numpy as np
from utils import *
"""
use this script to play any two agents against each other, or play manually with
any agent.
"""
choice = "othello"

if choice == "tictactoe":
    g = TicTacToeGame(5)
    n1 = NNet(g)
    n1.load_checkpoint('./temp/', 'best75_eps95_dim5.pth.tar')
    display = display
    hp = MinMaxTicTacToePlayer(g, 4).play
if choice == "gobang":
    g = GobangGame(6, 6)
    n1 = NNet1(g)
    n1.load_checkpoint('./temp/', 'temp:iter75:eps5:dim6.pth.tar')
    display = display1
    hp = MinMaxGobangPlayer(g, 6).play
if choice == "othello":
    g = OthelloGame(6)
    n1 = NNet2(g)
    n1.load_checkpoint('./temp/', 'best75:eps140:dim6.pth.tar')
    display = display2
Exemple #9
0
def experiment(game):
    np.random.seed(556)
    g = TicTacToeGame(game)
    if game == 3:
        total_episodes = n_episodes[0]
        ep_step = 10000
        ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1
        ep_range[0] = 0
        ep_range = ep_range.astype(int)
    elif game == 4:
        total_episodes = n_episodes[1]
        ep_step = 20000
        ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1
        ep_range[0] = 0
        ep_range = ep_range.astype(int)
    else:
        total_episodes = n_episodes[2]
        ep_step = 28000
        ep_range = np.arange(0, total_episodes + ep_step, ep_step) + 1
        ep_range[0] = 0
        ep_range = ep_range.astype(int)
    for lr in lrs:
        for i in epsilon_config:
            print('Config: Game', game, 'lr', lr, 'epsilon', i)
            test_wr_list = []
            test_wr = []
            if i == 'f':
                q_agent = QAgent(g,
                                 episodes=total_episodes,
                                 lr=lr,
                                 epsilon=0.2,
                                 dc=1,
                                 e_min=0.001,
                                 ep_arena=ep_step)
                rp = RandomPlayer(g).play
                q_agent_play = q_agent.play
            else:
                q_agent = QAgent(g,
                                 episodes=total_episodes,
                                 lr=lr,
                                 epsilon=1,
                                 dc=0.99,
                                 e_min=0.001,
                                 ep_arena=ep_step)
                rp = RandomPlayer(g).play
                q_agent_play = q_agent.play
            start = time()
            for idx, episode in enumerate(ep_range):
                if episode == ep_range[-1]:
                    break
                if episode == 0:
                    print('Training for Episodes ',
                          0,
                          ' to ',
                          ep_range[idx + 1] - 1,
                          '...',
                          sep='')
                elif episode == ep_range[-2]:
                    print('Training for Episodes ',
                          episode - 1,
                          ' to ',
                          total_episodes,
                          '...',
                          sep='')
                else:
                    print('Training for Episodes ',
                          episode - 1,
                          ' to ',
                          ep_range[idx + 1] - 1,
                          '...',
                          sep='')
                q_agent.train(cur_episode=episode)
                print('Training Finished.')
                print('Playing in Arena...')
                wins = 0
                temp = []
                for repet in range(reps):
                    arena_rp_op = Arena.Arena(q_agent_play,
                                              rp,
                                              g,
                                              display=display)
                    w, _, _ = arena_rp_op.playGames(n_games, verbose=False)
                    temp.append(w / n_games)
                    wins += w
                test_wr_list.append(temp)
                test_wr.append(wins / (reps * n_games))
                print('\n')
            end = time()
            training_time = np.array([end - start])
            np.save(
                'train_wr_tictactoe_' + str(game) + '_' + str(lr) + '_' +
                str(i), q_agent.total_wins)
            np.save(
                'train_ep_tictactoe_' + str(game) + '_' + str(lr) + '_' +
                str(i), q_agent.total_eps)
            np.save(
                'test_wr_tictactoe_' + str(game) + '_' + str(lr) + '_' +
                str(i), test_wr)
            np.save(
                'test_wr_list_tictactoe_' + str(game) + '_' + str(lr) + '_' +
                str(i), test_wr_list)
            np.save(
                'training_time_' + str(game) + '_' + str(lr) + '_' + str(i),
                training_time)
            print('\n')