def main(): log.info('Loading %s...', Game.__name__) g = Game(8) log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() run = wandb.init(project=args.wandb_project,config=args,reinit=True) log.info('Starting the learning process 🎉') c.learn() run.finish()
def learn(q): game = Game(8) nnet = onnet(game) for i in range(1, 2): print('------ITER ' + str(i) + '------') iterationTrainExamples = deque([], maxlen=200000) for eps in range(1): mcts = MCTS(game, nnet) iterationTrainExamples += executeEpisode(game, mcts) print('1') q.put(iterationTrainExamples)
def main(): log.info('Loading %s...', Game.__name__) g = Game(6) log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
def collect_data(q): args = dotdict({ 'numIters': 11, 'numEps': 50, 'tempThreshold': 15, 'updateThreshold': 0.5, 'maxlenOfQueue': 200000, 'numMCTSSims': 100, 'arenaCompare': 2, 'cpuct': 1, 'checkpoint': './temp/', 'load_model': True, 'load_folder_file': ('./temp/', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) game = Game(8) nnet = nn(game) iterationTrainExamples = deque([], maxlen=args.maxlenOfQueue) eps_time = AverageMeter() bar = Bar('Self Play', max=args.numEps) end = time.time() for eps in range(int(args.numEps / 2)): print(eps) mcts = MCTS(game, nnet, args) # reset search tree iterationTrainExamples += executeEpisode(mcts, game) # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format( eps=eps + 1, maxeps=args.numEps, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td) bar.next() q.put(iterationTrainExamples) bar.finish()
50, # Number of games to play during arena play to determine if new net will be accepted. 'cpuct': 1, 'checkpoint': checkpoint, 'load_model': True, 'load_folder_file': (checkpoint, 'checkpoint.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) nimConfig = {'maxPileSize': 10, 'maxNumPile': 3, 'initialState': None} if __name__ == "__main__": print("Serial Flag: " + str(serialFlag)) if serialFlag: if gameChoice == 0: g = Game(6) elif gameChoice == 1: g = TicTacToeGame() elif gameChoice == 2: g = nimGame(nimConfig) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file")
next_state, next_player = game.getNextState(state, 1, action) next_state = game.getCanonicalForm(next_state, -1) node = str(next_state.reshape(-1) + 1)[1:-1] cannonical_states[vertex][action]['winner'] = game.getGameEnded(next_state, 1) # * np.abs(next_state.sum()) cannonical_states[vertex][action]['next_node'] = node if node not in seen: seen[node] = 1 # seen.add(node) queue.append(node) else: for action, data in cannonical_states[vertex].items(): node = data['next_node'] if node not in seen: seen[node] = 1 # seen.add(node) queue.append(node) N = len(cannonical_states) if N%10000 == 0: print('\rstates: {}'.format(N), end='') cannonical_states.sync() print() cannonical_states.close() return n = 6 game = Game(n) board = game.getInitBoard() bfs_cannonical(game, board, './data/6by6', first_player=1)
return trainExamplesHistory if __name__ == '__main__': args = dotdict({ 'numIters': 11, 'numEps': 50, 'tempThreshold': 15, 'updateThreshold': 0.5, 'maxlenOfQueue': 200000, 'numMCTSSims': 100, 'arenaCompare': 2, 'cpuct': 1, 'checkpoint': './temp/', 'load_model': True, 'load_folder_file': ('./temp/', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) g = Game(8) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) trainExamplesHistory = [] if args.load_model: print("Load trainExamples from file") trainExamplesHistory = loadTrainExamples(args) learn(args, nnet, g, trainExamplesHistory)
args = dotdict({ 'numIters': 1000, 'numEps': 100, 'tempThreshold': 15, 'updateThreshold': 0.6, 'maxlenOfQueue': 200000, 'numMCTSSims': 25, 'arenaCompare': 40, 'cpuct': 1, 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) if __name__=="__main__": g = Game() nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
from Coach import Coach from othello.OthelloGame import OthelloGame as Game from othello.pytorch.NNet import NNetWrapper as nn from utils import * args = dotdict({ 'numIters': 1000, 'numEps': 100, 'tempThreshold': 15, 'updateThreshold': 0.6, 'maxlenOfQueue': 200000, 'numMCTSSims': 25, 'arenaCompare': 40, 'cpuct': 1, 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/dev/models/8x100x50','best.pth.tar'), }) if __name__=="__main__": g = Game(6) #game env nnet = nn(g) #network if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args)#set train para c.learn()#train
'numIters': 1, #original value: 1000 'numEps': 2, #orginal value: 100 'tempThreshold': 15, 'updateThreshold': 0.6, 'maxlenOfQueue': 200000, 'numMCTSSims': 25, 'arenaCompare': 40, 'cpuct': 1, 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) if __name__ == "__main__": g = Game(6) #returns the game object (constructor) nnet = nn(g) #NNet class returns NNetWrapper for the game object (g) print( '----------------------********************-----------------------*********************-----------------' ) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) print('main.py==> ', 'args: ', args) c = Coach( g, nnet, args ) #returns the Coach object with params(game_object, NeuralNet, argument values) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples()
iterationTrainExamples = deque([], maxlen=200000) for eps in range(1): mcts = MCTS(game, nnet) iterationTrainExamples += executeEpisode(game, mcts) print('1') q.put(iterationTrainExamples) if __name__ == '__main__': q = Manager().Queue() p1 = Process(target=learn, args=(q, )) p2 = Process(target=learn, args=(q, )) p1.start() p2.start() p1.join() p2.join() res1 = q.get() res2 = q.get() res = [] res.append(res1) res.append(res2) trainExamples = [] game = Game(8) nnet = onnet(game) for e in res: trainExamples.extend(e) nnet.train(trainExamples) #print(res1) pass