Exemple #1
0
def main():
    log.info('Loading %s...', Game.__name__)
    g = Game(8)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()


    run = wandb.init(project=args.wandb_project,config=args,reinit=True)

    log.info('Starting the learning process 🎉')
    c.learn()
    run.finish()
def learn(q):

    game = Game(8)
    nnet = onnet(game)
    for i in range(1, 2):
        print('------ITER ' + str(i) + '------')
        iterationTrainExamples = deque([], maxlen=200000)
        for eps in range(1):
            mcts = MCTS(game, nnet)
            iterationTrainExamples += executeEpisode(game, mcts)
            print('1')
    q.put(iterationTrainExamples)
Exemple #3
0
def main():
    log.info('Loading %s...', Game.__name__)
    g = Game(6)

    log.info('Loading %s...', nn.__name__)
    nnet = nn(g)

    if args.load_model:
        log.info('Loading checkpoint "%s/%s"...', args.load_folder_file)
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])
    else:
        log.warning('Not loading a checkpoint!')

    log.info('Loading the Coach...')
    c = Coach(g, nnet, args)

    if args.load_model:
        log.info("Loading 'trainExamples' from file...")
        c.loadTrainExamples()

    log.info('Starting the learning process 🎉')
    c.learn()
def collect_data(q):
    args = dotdict({
        'numIters': 11,
        'numEps': 50,
        'tempThreshold': 15,
        'updateThreshold': 0.5,
        'maxlenOfQueue': 200000,
        'numMCTSSims': 100,
        'arenaCompare': 2,
        'cpuct': 1,
        'checkpoint': './temp/',
        'load_model': True,
        'load_folder_file': ('./temp/', 'best.pth.tar'),
        'numItersForTrainExamplesHistory': 20,
    })
    game = Game(8)
    nnet = nn(game)
    iterationTrainExamples = deque([], maxlen=args.maxlenOfQueue)

    eps_time = AverageMeter()
    bar = Bar('Self Play', max=args.numEps)
    end = time.time()
    for eps in range(int(args.numEps / 2)):
        print(eps)
        mcts = MCTS(game, nnet, args)  # reset search tree
        iterationTrainExamples += executeEpisode(mcts, game)
        # bookkeeping + plot progress

        eps_time.update(time.time() - end)
        end = time.time()
        bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
            eps=eps + 1,
            maxeps=args.numEps,
            et=eps_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td)
        bar.next()
    q.put(iterationTrainExamples)
    bar.finish()
Exemple #5
0
    50,  # Number of games to play during arena play to determine if new net will be accepted.
    'cpuct': 1,
    'checkpoint': checkpoint,
    'load_model': True,
    'load_folder_file': (checkpoint, 'checkpoint.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
})

nimConfig = {'maxPileSize': 10, 'maxNumPile': 3, 'initialState': None}

if __name__ == "__main__":

    print("Serial Flag: " + str(serialFlag))
    if serialFlag:
        if gameChoice == 0:
            g = Game(6)
        elif gameChoice == 1:
            g = TicTacToeGame()
        elif gameChoice == 2:

            g = nimGame(nimConfig)

        nnet = nn(g)

        if args.load_model:
            nnet.load_checkpoint(args.load_folder_file[0],
                                 args.load_folder_file[1])

        c = Coach(g, nnet, args)
        if args.load_model:
            print("Load trainExamples from file")
Exemple #6
0
                    next_state, next_player = game.getNextState(state, 1, action)
                    next_state = game.getCanonicalForm(next_state, -1)
                    node = str(next_state.reshape(-1) + 1)[1:-1]
                    cannonical_states[vertex][action]['winner'] = game.getGameEnded(next_state, 1) # * np.abs(next_state.sum())
                    cannonical_states[vertex][action]['next_node'] = node
                    if node not in seen:
                        seen[node] = 1
                        # seen.add(node)
                        queue.append(node)
            else:
                for action, data in cannonical_states[vertex].items():
                    node = data['next_node']
                    if node not in seen:
                        seen[node] = 1
                        # seen.add(node)
                        queue.append(node)
                
        N = len(cannonical_states)
        if N%10000 == 0:
            print('\rstates: {}'.format(N), end='')
            cannonical_states.sync()
    print()
    cannonical_states.close()
    return


n = 6
game = Game(n)
board = game.getInitBoard()

bfs_cannonical(game, board, './data/6by6', first_player=1)
    return trainExamplesHistory


if __name__ == '__main__':
    args = dotdict({
        'numIters': 11,
        'numEps': 50,
        'tempThreshold': 15,
        'updateThreshold': 0.5,
        'maxlenOfQueue': 200000,
        'numMCTSSims': 100,
        'arenaCompare': 2,
        'cpuct': 1,
        'checkpoint': './temp/',
        'load_model': True,
        'load_folder_file': ('./temp/', 'best.pth.tar'),
        'numItersForTrainExamplesHistory': 20,
    })
    g = Game(8)
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    trainExamplesHistory = []
    if args.load_model:
        print("Load trainExamples from file")
        trainExamplesHistory = loadTrainExamples(args)
    learn(args, nnet, g, trainExamplesHistory)
Exemple #8
0
args = dotdict({
    'numIters': 1000,
    'numEps': 100,
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 40,
    'cpuct': 1,

    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,

})

if __name__=="__main__":
    g = Game()
    nnet = nn(g)

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])

    c = Coach(g, nnet, args)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
    c.learn()
Exemple #9
0
from Coach import Coach
from othello.OthelloGame import OthelloGame as Game
from othello.pytorch.NNet import NNetWrapper as nn
from utils import *

args = dotdict({
    'numIters': 1000,
    'numEps': 100,
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 40,
    'cpuct': 1,

    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'),
})

if __name__=="__main__":
    g = Game(6) #game env
    nnet = nn(g) #network

    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1])

    c = Coach(g, nnet, args)#set train para
    c.learn()#train
Exemple #10
0
    'numIters': 1,  #original value: 1000
    'numEps': 2,  #orginal value: 100
    'tempThreshold': 15,
    'updateThreshold': 0.6,
    'maxlenOfQueue': 200000,
    'numMCTSSims': 25,
    'arenaCompare': 40,
    'cpuct': 1,
    'checkpoint': './temp/',
    'load_model': False,
    'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'),
    'numItersForTrainExamplesHistory': 20,
})

if __name__ == "__main__":
    g = Game(6)  #returns the game object (constructor)
    nnet = nn(g)  #NNet class returns NNetWrapper for the game object (g)
    print(
        '----------------------********************-----------------------*********************-----------------'
    )
    if args.load_model:
        nnet.load_checkpoint(args.load_folder_file[0],
                             args.load_folder_file[1])

    print('main.py==> ', 'args: ', args)
    c = Coach(
        g, nnet, args
    )  #returns the Coach object with params(game_object, NeuralNet, argument values)
    if args.load_model:
        print("Load trainExamples from file")
        c.loadTrainExamples()
        iterationTrainExamples = deque([], maxlen=200000)
        for eps in range(1):
            mcts = MCTS(game, nnet)
            iterationTrainExamples += executeEpisode(game, mcts)
            print('1')
    q.put(iterationTrainExamples)


if __name__ == '__main__':

    q = Manager().Queue()

    p1 = Process(target=learn, args=(q, ))
    p2 = Process(target=learn, args=(q, ))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    res = []
    res.append(res1)
    res.append(res2)
    trainExamples = []
    game = Game(8)
    nnet = onnet(game)
    for e in res:
        trainExamples.extend(e)
    nnet.train(trainExamples)
    #print(res1)
    pass