def __init__(self, NetType='ResNet'):
     self.game = game(BoardSize)
     self.board = self.game.getInitBoard()
     self.n = self.game.getBoardSize()[0]
     self.players = [self.AlphaPlay, None, self.HumanPlay]
     self.curPlayer = 1
     self.gameStatus = 0
     if NetType == 'ResNet':
         self.AlphaNet = nn(self.game, t='RES')
         self.AlphaNet.load_checkpoint(
             '/home/zc1213/course/alphabackend/alphabrain/HistoryLog/Go/R_Ver2_checkpoint/{}/'
             .format(BoardSize), 'best.pth.tar')
         self.AlphaArgs = dotdict({'numMCTSSims': 2000, 'cpuct': 21.3})
         self.AlphaMCTS = MCTS(self.game, self.AlphaNet, self.AlphaArgs)
         self.Alpha = lambda x: np.argmax(
             self.AlphaMCTS.getActionProb(x, temp=0))
     else:
         self.AlphaNet = nn(self.game, t='CNN')
         self.AlphaNet.load_checkpoint(
             '/home/zc1213/course/alphabackend/alphabrain/HistoryLog/Go/C_checkpoint/{}/'
             .format(BoardSize), 'best.pth.tar')
         self.AlphaArgs = dotdict({'numMCTSSims': 2000, 'cpuct': 17.3})
         self.AlphaMCTS = MCTS(self.game, self.AlphaNet, self.AlphaArgs)
         self.Alpha = lambda x: np.argmax(
             self.AlphaMCTS.getActionProb(x, temp=0))
     self.alphaMoveCache = {}
Esempio n. 2
0
def tour():
    pathAtt = './HistoryLog/Go/'
    Rcand = {
        'R1_10': [pathAtt + 'R_Ver1_checkpoint/7/', 'checkpoint_11.pth.tar'],
        'R1_40': [pathAtt + 'R_Ver1_checkpoint/7/', 'checkpoint_47.pth.tar'],
        'R1_B': [pathAtt + 'R_Ver1_checkpoint/7/', 'best.pth.tar'],
        'R2_B': [pathAtt + 'R_Ver2_checkpoint/7/', 'best.pth.tar'],
        'R3_B': [pathAtt + 'R_Ver3_checkpoint/7/', 'best.pth.tar']
    }
    Ccand = {
        'C_10': [pathAtt + 'C_Ver1_checkpoint/7/', 'checkpoint_6.pth.tar'],
        'C_40': [pathAtt + 'C_Ver1_checkpoint/7/', 'checkpoint_40.pth.tar'],
        'C_B': [pathAtt + 'C_Ver1_checkpoint/7/', 'best.pth.tar']
    }

    compares = [('R1_10', 'C_10'), ('R1_40', 'C_40'), ('R1_B', 'C_B'),
                ('R2_B', 'C_B'), ('R3_B', 'C_B'), ('R1_B', 'R2_B'),
                ('R1_B', 'R3_B'), ('R2_B', 'R3_B')]
    res = []
    for c in [('R1_10', 'C_10')]:
        print(c)
        p1type = 'RES' if c[0][0] == 'R' else 'CNN'
        p2type = 'RES' if c[1][0] == 'R' else 'CNN'
        p1checkpoint = Rcand[c[0]] if c[0][0] == 'R' else Ccand[c[0]]
        p2checkpoint = Rcand[c[1]] if c[1][0] == 'R' else Ccand[c[1]]

        print(p1type, p2type)
        print(p1checkpoint, p2checkpoint)

        Net1 = nn(g, t=p1type)
        Net1.load_checkpoint(p1checkpoint[0], p1checkpoint[1])
        Args1 = dotdict({'numMCTSSims': 3000, 'cpuct': 17.5})
        MCTS1 = MCTS(g, Net1, Args1)
        Player1 = lambda x: np.argmax(MCTS1.getActionProb(x, temp=0))

        Net2 = nn(g, t=p2type)
        Net2.load_checkpoint(p2checkpoint[0], p2checkpoint[1])
        Args2 = dotdict({
            'numMCTSSims': 3000 if p2type == 'RNN' else 250,
            'cpuct': 17.5 if p2type == 'RNN' else 3.0
        })
        MCTS2 = MCTS(g, Net2, Args2)
        Player2 = lambda x: np.argmax(MCTS2.getActionProb(x, temp=0))

        arena = Arena.Arena(Player1, Player2, g, display=display)
        _res = arena.playGames(10, verbose=True)
        res.append(_res)
    result = {'1win': [], '2win': [], 'draw': []}
    for r in res:
        result['1win'].append(r[0])
        result['2win'].append(r[1])
        result['draw'].append(r[2])
    pd.DataFrame(data=result).to_csv('reuslt.csv')
Esempio n. 3
0
def main():

    np.random.seed(123)

    log.info('Loading %s...', Game.__name__)
    g = Game(7)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
Esempio n. 4
0
from go.pytorch.NNet import NNetWrapper as nn

args = dotdict({
    'numIters': 10,
    'numEps': 40,
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 10,
    'arenaCompare': 20,
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
})

if __name__ == "__main__":
    g = GoGame(3)
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
    'maxlenOfQueue': 200000,
    'numMCTSSims': 200,
    'arenaCompare': 50,
    'cpuct': 3,

    'checkpoint': './HistoryLog/Go/{}_checkpoint/{}/'.format(NetType+'_'+tag,BoardSize),
    'load_model': True,
    'load_folder_file': ('./HistoryLog/Go/{}_checkpoint/{}/'.format(NetType+'_'+tag,BoardSize),'best.pth.tar'),
    'numItersForTrainExamplesHistory': 25,
    'display':NO_DIS #True to display board, False to display progress bar
})

if __name__=="__main__":

    g = Game(BoardSize)
    nnet = nn(g,t='RES' if NetType=='R' else 'CNN')
    logPath='./HistoryLog/Go/{}_Log/{}/'.format(NetType+'_'+tag,BoardSize)
    try:
        os.makedirs(logPath)
    except:
        pass

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])

    c = Coach(g, nnet, args,log=True,logPath=logPath)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Esempio n. 6
0
"""
use this script to play any two agents against each other, or play manually with
any agent.
"""
BoardSize = 7
g = game(BoardSize)

# all players
rp = RandomPlayer(g).play
gp = GreedyGoPlayer(g).play
hp = HumanGoPlayer(g).play

# nnet players
NetType = 'CNN'

ResNet = nn(g, t='RES')
ResNet.load_checkpoint(
    './HistoryLog/Go/R_Ver2_checkpoint/{}/'.format(BoardSize),
    'RVer2.best.pth.tar')
ResArgs = dotdict({'numMCTSSims': 3000, 'cpuct': 17.0})
ResMCTS = MCTS(g, ResNet, ResArgs)
ResPlayer = lambda x: np.argmax(ResMCTS.getActionProb(x, temp=0))

CNN = nn(g, t='CNN')
CNN.load_checkpoint('./HistoryLog/Go/C_checkpoint/{}/'.format(BoardSize),
                    'checkpoint_4.pth.tar')
CNNArgs = dotdict({'numMCTSSims': 250, 'cpuct': 3.0})
CNNMCTS = MCTS(g, CNN, CNNArgs)
CNNPlayer = lambda x: np.argmax(CNNMCTS.getActionProb(x, temp=0))

arena = Arena.Arena(ResPlayer, CNNPlayer, g, display=display)