def check_result_and_save_network(pwins, nwins, draws, game, args, iter_num): # set gpu os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU if pwins + nwins > 0 and float(nwins + (0.5 * draws)) / ( pwins + nwins + draws) < args.updateThreshold: print('REJECTING NEW MODEL') else: print('ACCEPTING NEW MODEL') net = nn(game) net.load_checkpoint(folder=args.checkpoint, filename='train.pth.tar') net.save_checkpoint(folder=args.checkpoint, filename='best.pth.tar') net.save_checkpoint(folder=args.checkpoint, filename='checkpoint_' + str(iter_num) + '.pth.tar')
def async_against(game, args, iter_num): import tensorflow as tf #bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(i=iter_num + 1, x=args.arenaCompare, # total=bar.elapsed_td, eta=bar.eta_td) #bar.next() # set gpu if args.multiGPU: if iter_num % 2 == 0: os.environ["CUDA_VISIBLE_DEVICES"] = "0" else: os.environ["CUDA_VISIBLE_DEVICES"] = "1" else: os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU # set gpu memory grow config = tf.ConfigProto() config.gpu_options.allow_growth = True _ = tf.Session(config=config) # create nn and load nnet = nn(game) pnet = nn(game) try: nnet.load_checkpoint(folder=args.checkpoint, filename='train.pth.tar') except: print("load train model fail") pass try: pnet.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar') except: print("load old model fail") pass pmcts = MCTS(game, pnet, args) nmcts = MCTS(game, nnet, args) arena = Arena(lambda x: np.argmax(pmcts.get_action_prob(x, temp=0)), lambda x: np.argmax(nmcts.get_action_prob(x, temp=0)), game) arena.displayBar = False pwins, nwins, draws = arena.playGames(2) return pwins, nwins, draws
def async_self_play(game, args, iter_num, iterr): import tensorflow as tf #bar.suffix = "iter:{i}/{x} | Total: {total:} | ETA: {eta:}".format(i=iter_num + 1, x=iterr, # total=bar.elapsed_td, eta=bar.eta_td) #bar.next() # set gpu if args.multiGPU: if iter_num % 2 == 0: os.environ["CUDA_VISIBLE_DEVICES"] = "0" else: os.environ["CUDA_VISIBLE_DEVICES"] = "1" else: os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU # set gpu memory grow config = tf.ConfigProto() config.gpu_options.allow_growth = True _ = tf.Session(config=config) # create nn and load net = nn(game) mcts = MCTS(game, net, args) try: net.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar') except: pass train_examples = [] board = game.getInitBoard() cur_player = 1 episode_step = 0 while True: episode_step += 1 canonical_board = game.getCanonicalForm(board, cur_player) temp = int(episode_step < args.tempThreshold) pi = mcts.get_action_prob(canonical_board, temp=temp) sym = game.getSymmetries(canonical_board, pi) for b, p in sym: train_examples.append([b, cur_player, p, None]) action = np.random.choice(len(pi), p=pi) board, cur_player = game.getNextState(board, cur_player, action) r = game.getGameEnded(board, cur_player) if r != 0: return [(x[0], x[2], r * ((-1)**(x[1] != cur_player))) for x in train_examples]
def async_train_network(game, args, trainhistory): # set gpu os.environ["CUDA_VISIBLE_DEVICES"] = args.setGPU # create network for training nnet = nn(game) try: nnet.load_checkpoint(folder=args.checkpoint, filename='best.pth.tar') except: pass # ---load history file--- model_file = os.path.join(args.checkpoint, "trainhistory.pth.tar") examples_file = model_file + ".examples" if not os.path.isfile(examples_file): print(examples_file) else: print("File with trainExamples found. Read it.") with open(examples_file, "rb") as f: for i in Unpickler(f).load(): trainhistory.append(i) # ---delete if over limit--- if len(trainhistory) > args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(trainhistory), " => remove the oldest trainExamples") del trainhistory[len(trainhistory) - 1] # ---extend history--- train_examples = [] for e in trainhistory: train_examples.extend(e) # ---save history--- folder = args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder, 'trainhistory.pth.tar' + ".examples") with open(filename, "wb+") as f: Pickler(f).dump(trainhistory) print('Train with {} examples'.format(len(train_examples))) nnet.train(train_examples) nnet.save_checkpoint(folder=args.checkpoint, filename='train.pth.tar')
'numIters': 1000, 'numEps': 100, 'tempThreshold': 15, 'updateThreshold': 0.6, 'maxlenOfQueue': 200000, 'numMCTSSims': 25, 'arenaCompare': 40, 'cpuct': 1, 'checkpoint': './temp/', 'load_model': True, 'load_folder_file': ('./temp/', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, }) g = Game(6) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) board = g.getInitBoard() player = 1 while g.getGameEnded(board, player) == 0: mcts = MCTS(g, nnet, args) action = mcts.get_action_prob(g.getCanonicalForm(board, 1)) print(action) board, player = g.getNextState(board, 1, np.argmax(action)) g.display(board)