def eval_test(): zeroNN1 = ZeroNN(verbose=False, path=mkdir(join(FOLDER_ZERO_NNS, 'NNs')), ckpt_idx=-1) zeroNN2 = ZeroNN(verbose=False, path=mkdir(join(FOLDER_ZERO_NNS, 'NNs')), ckpt_idx=-1) mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=100) mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=100) winrate1, winrate2, tie_rate, ai_hists = \ eval_mcts(5, 5, 4, mcts1, mcts2, True, 1, True)
def optimization(self): with self.lock_model_paths: zeroNN = ZeroNN(verbose=2, path=self.folder_NNs, ckpt_idx=-1, num_samples=self.train_size, epoch=3, batch_size=self.batch_size, save_epochs=4, logger=self.logger) self.unchecked_model_paths = zeroNN.trained_model_paths self.logger.log('optimization start!') while self.self_play_cnt > 0: while self.train_data is None or\ not self.data_avail or\ self.train_ratio * len(self.train_data[0]) * 0.8 < self.batch_size: time.sleep(10) # Wait for the models to be evaluated # Better models need to be selected to generate better data # remove old models to ease burden of evaluator while len(self.unchecked_model_paths) > 10 and np.random.rand( ) < 0.99: time.sleep(20) with self.lock_model_paths: self.unchecked_model_paths.remove( self.unchecked_model_paths[round(np.random.rand() * 8)]) # given time slices for the other two threads with self.lock_train_data: train_data = [ self.train_data[0].copy(), self.train_data[1].copy(), self.train_data[2].copy() ] # save the training data in case that we need to use them to continue training for i in range(3): np.save(self.data_path[i], self.train_data[i]) print(len(self.train_data[0]), 'saved') # select some playing histories to train to control overfitting nonrep_rand_nums = non_repeated_random_nums( len(train_data[0]), round(self.train_ratio * len(train_data[0]))) zeroNN.fit(train_data[0][nonrep_rand_nums], train_data[1][nonrep_rand_nums], train_data[2][nonrep_rand_nums], 0.1) self.data_avail = False zeroNN.epoch = zeroNN.verbose = zeroNN.save_epochs = 10 self.model_avail = True while not self.data_avail: time.sleep(30)
def eval_debug(): # zeroNN1 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs')) zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + '885', 'NNs'), ckpt_idx=join(FOLDER_ZERO_NNS + '885', 'NNs/model.ckpt-1176')) # zeroNN2 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs')) zeroNN2 = ZeroNN(path=join(FOLDER_ZERO_NNS + '885', 'NNs'), ckpt_idx=join(FOLDER_ZERO_NNS + '885', 'NNs/model.ckpt-1857')) mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=64) mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=64) winrate1, winrate2, tie_rate, ai_hists = \ eval_mcts(8, 8, 5, mcts1, mcts2, True, 2, True) print(winrate1, winrate2, tie_rate) '''
def eval_debug(): # zeroNN1 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs')) zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + '115', 'NNs'), ckpt_idx=join(FOLDER_ZERO_NNS + '115', 'NNs/model.ckpt-145981')) # zeroNN2 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs')) # zeroNN1 = None zeroNN2 = ZeroNN(path=join(FOLDER_ZERO_NNS + '115', 'NNs'), ckpt_idx=join(FOLDER_ZERO_NNS + '115', 'NNs/model.ckpt-105633')) zeroNN2 = None mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=512) mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=1024) winrate1, winrate2, tie_rate, ai_hists = \ eval_mcts(11, 11, 5, mcts1, mcts2, sim_times=5, verbose=True) print(winrate1, winrate2, tie_rate) '''
def op_eval_test(): # zeroNN1 = ZeroNN(verbose=False,path=join(FOLDER_ZERO_NNS, 'NNs')) zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + '115', 'NNs'), ckpt_idx=join(FOLDER_ZERO_NNS + '115', 'NNs/model.ckpt-153223')) zeroNN2 = zeroNN1 mcts1 = Mcts(0,0,zeroNN=zeroNN1,max_acts_=512, op_weight=None) mcts2 = Mcts(0,0,zeroNN=zeroNN2,max_acts_=512, op_weight=1) winrate1, winrate2, tie_rate, ai_hists = \ eval_mcts(11, 11, 5, mcts1, mcts2, sim_times=5, verbose=True) print(winrate1, winrate2, tie_rate)
def main_debug(): MctsPuct.CHECK_DETAILS = True collect_ai_hists = False gb = '115' if True: path = join(mkdir(join(FOLDER_ZERO_NNS + gb, 'replays')), curr_time_str() + '_AI_test') game = Game(int(gb[0]) + 10, int(gb[1]) + 10, 5, Game.Player.AI, Game.Player.human, collect_ai_hists=collect_ai_hists) zeroNN1 = ZeroNN(path=join(FOLDER_ZERO_NNS + gb, 'NNs'), ckpt_idx=join(FOLDER_ZERO_NNS + gb, 'NNs/model.ckpt-316206')) # model.ckpt-16533 # zeroNN1 = None noise = 0 if True: game.players[0].mcts.zeroNN = zeroNN1 game.players[0].mcts.max_acts = 2048 # game.players[0].mcts.val_mult = 2 # game.players[0].mcts.noise = noise # game.players[0].mcts.do_policy_prune = False # game.players[0].mcts.hand_val = 0.5 # game.players[0].c = 15 # game.players[0].mcts.red_child = True # 10.8 9.5 # game.players[0].mcts.further_check = False if False: game.players[1].mcts.zeroNN = zeroNN1 game.players[1].mcts.max_acts = 1024 game.players[1].mcts.noise = noise game.start(graphics=True) pk.dump(game.acts, open(pkfn(path), 'wb')) input("over") if collect_ai_hists: debug = mkdir("debug") probs, eval_board, winner = game.ai_hists() sp0, sp1, sp2 = ZeroNNTrainer.hists2enhanced_train_data( [[probs, eval_board, winner]]) np.save(join(debug, npfn('sp0')), sp0) np.save(join(debug, npfn('sp1')), sp1) np.save(join(debug, npfn('sp2')), sp2) exit() print(len(probs), len(eval_board), winner) print(probs[0].shape, eval_board[0].shape, winner) game = Game(11, 11, 5, use_hists=np.array(eval_board)) game.start()
def main_debug(): game = Game(8, 8, 5, Game.Player.AI, Game.Player.AI, collect_ai_hists=False) zeroNN1 = ZeroNN( path=join(FOLDER_ZERO_NNS + '885', 'NNs'), ckpt_idx=-1) #join(FOLDER_ZERO_NNS + '885', 'NNs/model.ckpt-10300') ) zeroNN2 = ZeroNN(path='new_train_zeronn/zeronn8', ckpt_idx=-1) zeroNN1 = zeroNN2 game.players[0].mcts.zeroNN = zeroNN1 game.players[0].mcts.max_acts = 512 game.players[0].mcts.hv21 = 0 game.players[0].mcts.hand_val = 0 # game.players[0].mcts.red_child = True # 10.8 9.5 # game.players[0].mcts.further_check = False game.players[1].mcts.zeroNN = zeroNN1 game.players[1].mcts.max_acts = 512 game.players[1].mcts.hv21 = 0 game.players[1].mcts.hand_val = 0 # game.players[1].mcts.update_itv = 0 game.start(graphics=True) print("over") return None probs, eval_board, winner = game.ai_hists() for i in range(3): print(probs[i]) print(eval_board[i][:, :, 0]) print(eval_board[i][:, :, 1]) print(eval_board[i][:, :, 2]) print(eval_board[i][:, :, 3]) print('\n\n') print(np.array(probs).shape) print(np.array(eval_board).shape) print(winner)
def optimization(self, only_opt=False): # print('opt0') with self.lock_model_paths: zeroNN = ZeroNN(verbose=10, path=self.folder_NNs, ckpt_idx=-1, num_samples=100000, trained_model_paths=self.unchecked_model_paths, epoch=10, batch_size=self.batch_size, save_epochs=10, logger=self.logger) self.logger.log('optimization start!') while self.shared_vars['self_play_cnt'] > 0 or only_opt: # print('opt1') while not self.shared_vars['data_avail'] and not only_opt: # print('opt2') time.sleep(10) # train_data = ZeroNNTrainer.manip_train_data(self.shared_constants, self.shared_vars, self.lock_train_data, # best_player_path=self.shared_vars['best_player_path']) train_data, check_data = self.get_train_data() if train_data[0].shape[0] < self.batch_size * 4 and not only_opt: time.sleep(10) continue # Wait for the models to be evaluated # Better models need to be selected to generate better data # remove old models to ease burden of evaluator while len(self.unchecked_model_paths) > self.n_eval_processes + 3: with self.lock_model_paths: self.unchecked_model_paths.remove( self.unchecked_model_paths[0]) # select some playing histories to train to control overfitting # nonrep_rand_nums = non_repeated_random_nums(len(train_data[0]), round(self.train_ratio * len(train_data[0]))) if zeroNN.predict_avail(): eval = zeroNN.run_eval(check_data[0], check_data[1], check_data[2]) self.logger.log( 'Opt check: \n', 'sp items: [loss_policy, loss_value, loss_total, acc_value, acc_policy]:', '\n eval: ', eval) # Evaluate how the zeroNN works on the latest played game. # This is the real test data since the data are not feeded for zeroNN's training yet so we need to save the # evaluations. self.loss_hists.append([self.shared_vars['curr_generation']] + eval) np.save(self.path_loss_hists, np.array(self.loss_hists)) print('optimization fit', train_data[0].shape[0], '...') zeroNN.fit(train_data[0], train_data[1], train_data[2], 0.1) self.shared_vars['model_avail'] = True self.shared_vars['data_avail'] = False print('optimization over')
def test_664(): winner = 0 while True: MctsPuct.CHECK_DETAILS = True game = Game(5, 5, 4, Game.Player.AI, Game.Player.AI, collect_ai_hists=False) zeroNN1 = ZeroNN( path=r'F:\Software\vspro\NInRow\NInRow\test554\NNs', ckpt_idx= r'F:\Software\vspro\NInRow\NInRow\test554\NNs\model.ckpt-257210') #216633 #ckpt_idx=r'F:\Software\vspro\NInRow\NInRow\test554\NNs\model.ckpt-177628') """ [[0.01 0.01 0. 0. 0. ] [0. 0.01 0.01 0.02 0.01] [0.01 0.06 0.17 0.24 0.03] [0. 0.02 0.03 0.15 0.06] [0. 0.02 0.06 0.07 0.03]] """ noise = 0 game.players[0].mcts.zeroNN = zeroNN1 game.players[1].mcts.zeroNN = zeroNN1 game.players[0].mcts.max_acts = 256 game.players[1].mcts.max_acts = 256 game.players[0].mcts.noise = noise game.players[1].mcts.noise = noise game.start(graphics=True) winner += (game.winner == 0) * 2 - 1 input(str(winner))
def self_play(self): while not self.model_avail and self.nozero_mcts is None: time.sleep(5) time.sleep(round(np.random.rand() * 60 * self.n_play_threads + 1)) self.logger.log('self_play start!') plays = self.plays while self.self_play_cnt > 0: zeroNN1 = ZeroNN(verbose=False, path=self.folder_NNs, ckpt_idx=self.best_player_path) zeroNN2 = ZeroNN(verbose=False, path=self.folder_NNs, ckpt_idx=self.best_player_path) best_player_path = self.best_player_path # we do not lock for self_play_cnt while self.self_play_cnt > 0: self.self_play_cnt -= plays # decay resign_val # rookies should always play the game to the end while masters are allowed to resign at an earlier stage self.resign_val = max( 0.75, self.resign_val - self.resign_val * 0.00002 * plays) self.logger.log('self_play:', 'self_play_cnt=', self.self_play_cnt, ' self.resign_val=', self.resign_val) # Create two identical players to 'self play' if self.nozero_mcts is not None: mcts1 = Mcts(0, 0, zeroNN=None, max_acts_=self.nozero_mcts_sims, const_temp=1, noise=0.2, resign_val=0.99, temp2zero_moves=3, hand_val=self.hand_val) mcts2 = Mcts(0, 0, zeroNN=None, max_acts_=self.nozero_mcts_sims, const_temp=1, noise=0.2, resign_val=0.99, temp2zero_moves=3, hand_val=self.hand_val) else: mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=self.mcts_sims, const_temp=1, temp2zero_moves=3, noise=0.2, resign_val=self.resign_val, hand_val=self.hand_val) mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=self.mcts_sims, const_temp=1, temp2zero_moves=3, noise=0.2, resign_val=self.resign_val, hand_val=self.hand_val) t = time.time() winrate1, winrate2, tie_rate, ai_hists = \ eval_mcts(self.board_rows, self.board_cols, self.n_in_row, mcts1, mcts2, False, plays//2, True) ai_hists = self.hists2enhanced_train_data(ai_hists) # Evaluate how the zeroNN works on the latest played game. # This is the real test data since the data are not feeded for zeroNN's training yet so we need to save the # evaluations. if self.nozero_mcts is None: eval = zeroNN1.run_eval(ai_hists[0], ai_hists[1], ai_hists[2]) self.logger.log( 'self-play in ' + str(time.time() - t) + 's\n', 'sp items: [loss_policy, loss_value, loss_total, acc_value]:', '\n eval: ', eval) self.loss_hists.append([self.curr_generation] + eval) # Append the latest data to the old. with self.lock_train_data: if self.train_data is None or len( self.train_data) == self.batch_size: self.train_data = ai_hists else: self.train_data = [np.vstack([self.train_data[0], ai_hists[0]]).astype(np.bool), np.vstack([self.train_data[1], ai_hists[1]]), np.vstack([self.train_data[2], ai_hists[2]])]\ if self.train_data is not None else ai_hists # save the training data in case that we need to use them to continue training for i in range(3): np.save(self.data_path[i], self.train_data[i]) # Discard some old data since our memory is running out if len(self.train_data[0]) > self.train_size + 1: for i in range(3): self.train_data[i] = self.train_data[i][ -round(self.train_size * 0.6 + 1):] self.logger.log('self_play:', winrate1, winrate2, tie_rate, ' new data size=', ai_hists[0].shape, ' total data:', self.train_data[0].shape) self.data_avail = True with self.lock_model_best: find_new_best = (self.best_player_path != best_player_path) if find_new_best and len(self.train_data[0]) > 10000: # Discard some old data since a new best player is trained, we need to use data of games played by # it to train new models with self.lock_train_data: len_train_data0 = len(self.train_data[0]) for i in range(3): self.train_data[i] = self.train_data[i][ -round(len_train_data0 * 0.6 + 1):] break
def evaluator(self): while not self.model_avail: time.sleep(5) self.lock_model_paths.acquire() while len(self.unchecked_model_paths) != 0: self.unchecked_model_paths.pop() self.lock_model_paths.release() # try to test checkpoints as different as possible time.sleep(round(np.random.rand() * 30 * self.n_eval_threads + 1)) self.logger.log('evaluator start!') while self.self_play_cnt > 0 or len(self.unchecked_model_paths) > 5: # use 'with' to lock to avoid forgetting to release it with self.lock_model_paths: # wait for latest trained model if len(self.unchecked_model_paths) < 2: time.sleep(30) continue path_to_check = self.unchecked_model_paths.pop() if len(self.unchecked_model_paths) > 5: if np.random.rand() < 0.5: self.unchecked_model_paths.pop() else: path_to_check = self.unchecked_model_paths.pop() self.logger.log('evaluator:', self.best_player_path, 'VS', path_to_check, '...') if self.nozero_mcts is None: best_mcts = Mcts(0, 0, zeroNN=ZeroNN(verbose=False, path=self.folder_NNs, ckpt_idx=self.best_player_path), max_acts_=self.mcts_sims, const_temp=0, noise=0.1, resign_val=self.resign_val, hand_val=self.hand_val) else: # When self.nozero_mcts is not None, the first generation of zeroNN is not generated yet # We double number of simulations since MCTS without zeroNN can make a faster searching, # which also means any trained model is able to defeat MCTS without zeroNN using doule simulations best_mcts = Mcts(0, 0, zeroNN=None, max_acts_=self.nozero_mcts_sims, const_temp=0.2, noise=0.1, hand_val=self.hand_val) zeroNN_to_check = ZeroNN(verbose=False, path=self.folder_NNs, ckpt_idx=path_to_check) mcts2 = Mcts(0, 0, zeroNN=zeroNN_to_check, max_acts_=self.mcts_sims, const_temp=0, noise=0.1, resign_val=self.resign_val, hand_val=self.hand_val) # the evaluation must be fast to select the best model # play only several games, but require the player to check have a overwhelming advantage over the existing player # if the best player is defeated, then the player to check can take the first place winrate1, winrate2, tie_rate, _ = \ eval_mcts(self.board_rows, self.board_cols, self.n_in_row, best_mcts, mcts2, False, [3,1], False) self.logger.log('evaluator:', self.best_player_path, 'VS', path_to_check, '--', winrate1, '-', winrate2, '-', tie_rate) time.sleep(5) # if the new player wins 3 out of 4 and draws in one game, replace the best player with it if winrate2 > 0.4 and winrate1 < 0.01: self.curr_generation += 1 self.logger.log('evaluator:', path_to_check, 'defeat', self.best_player_path, 'by', winrate2 - winrate1) self.logger.log(path_to_check, 'becomes generation', self.curr_generation) with self.lock_model_best: self.best_player_path = path_to_check self.nozero_mcts = None
def console(self): """ Use console to help training """ while True: cmd = input() try: if cmd == 'md': folder = input( 'Input the folder name where the three data file(np file) exists. ' + 'The files should be named [selfplay0.npy], [selfplay1.npy] and [selfplay2.npy]:\n' ) try: data_path = [ join(folder, npfn('selfplay' + str(i))) for i in range(3) ] train_data = [np.load(p) for p in data_path] with self.lock_train_data: self.train_data = [ np.vstack([self.train_data[0], train_data[0]]).astype(np.bool), np.vstack([self.train_data[1], train_data[1]]), np.vstack([self.train_data[2], train_data[2]]) ] self.data_avail = True print("load new data succeessfully, data size = ", len(self.train_data[0])) except: print( "md error: folder or files do not exist or are invalid" ) elif cmd == 'pl': plays = input( "Input the number of games for one self-play, it should be a positive even number:\n" ) def plerr(): print("pl error: ", plays, " is invalid to be a self-play number") try: plays = int(plays) if plays > 0 and plays % 2 == 0: self.plays = plays print("self.plays -> ", self.plays) else: plerr() except: plerr() elif cmd == 'bs': batch_size = input( "Input the number of batch size, it should be a positive number:\n" ) def bserr(): print("bs error") try: batch_size = int(batch_size) if batch_size > 0: self.batch_size = batch_size print("Batch size reset to", batch_size) else: bserr() except: bserr() elif cmd == 'bp': best_player_path = input( "Input num best player path, th current is " + str(self.best_player_path) + ":\n") if os.path.exists(best_player_path + '.index'): self.best_player_path = best_player_path print("best player path -> ", self.best_player_path) else: print("bp error,", best_player_path, 'is invalid') elif cmd == 'nm': def nmerr(): print("nm error") nozero_mcts_sims = input( "Input number of simulations of nozero_mcts's search. A non-positive number means deactivation\n" + " Currently, nozero_mcts is " + str(self.nozero_mcts) + ". And nozero_mcts_sims is " + str(self.nozero_mcts_sims) + ":\n") try: nozero_mcts_sims = int(nozero_mcts_sims) if nozero_mcts_sims > 0: self.nozero_mcts_sims = nozero_mcts_sims self.nozero_mcts = Mcts( 0, 0, zeroNN=None, max_acts_=self.nozero_mcts_sims, const_temp=0.2, noise=0.1, hand_val=self.hand_val) print( "nozero_mcts activated. And nozero_mcts_sims is ", self.nozero_mcts_sims) else: self.nozero_mcts = None print( "nozero_mcts deactivated. And nozero_mcts_sims is ", self.nozero_mcts_sims) except: nmerr() elif cmd == 'op': print("Start an optimization now") self.data_avail = True elif cmd == 'pc': self_play_cnt = input("Input number of play count:\n") try: self_play_cnt = int(self_play_cnt) self.self_play_cnt = max(self_play_cnt, 0) print("self.self_play_cnt -> ", self.self_play_cnt) except: print("pc error") elif cmd == 'te': # use te will clear the screen game = Game(self.board_rows, self.board_cols, self.n_in_row, Game.Player.AI, Game.Player.human, collect_ai_hists=False) zeroNN1 = ZeroNN(path=join(self.folder_NNs), ckpt_idx=-1) game.players[0].mcts.zeroNN = zeroNN1 game.players[0].mcts.max_acts = self.mcts_sims game.start(graphics=True) elif cmd == 'di': def dierr(): print("di error") discard = input( "Input number of data to discard, the current number is " + str(len(self.train_data[0])) + ":\n") try: discard = int(discard) if discard <= 0 or discard > len( self.train_data[0]) - 1: dierr() for i in range(3): self.train_data[i] = self.train_data[i][discard:] print("Discard successfully! The current number is ", len(self.train_data[0])) except: dierr() elif cmd == 'hv': def hverr(): print('hv errpr') hand_val = input("Input hand_val, the current number is " + str(self.hand_val) + ":\n") try: self.hand_val = float(hand_val) except: hverr() elif cmd == 'sv': for i in range(3): np.save(self.data_path[i], self.train_data[i]) elif cmd == 'pt': print('Folder=', self.folder) else: print("command error. (cmd=", cmd, ")") except: print("Unknown console error!")
def self_play(consts, vars, unchecked_model_paths, lock_train_data, lock_model_best, loss_hists, path_loss_hists, logger, idx): while not vars['model_avail'] and vars['nozero_mcts'] == False: time.sleep(5) logger.log('self_play' + str(idx) + ' start!') while vars['self_play_cnt'] > 0: zeroNN1 = ZeroNN(verbose=False, path=consts['folder_NNs'], ckpt_idx=vars['best_player_path']) zeroNN2 = zeroNN1 best_player_path = vars['best_player_path'] # we do not lock for self_play_cnt while vars['self_play_cnt'] > 0: vars['self_play_cnt'] -= vars['plays'] # decay resign_val # rookies should always play the game to the end while masters are allowed to resign at an earlier stage vars['resign_val'] = max( 0.75, vars['resign_val'] - vars['resign_val'] * 0.0001 * vars['plays']) # Create two identical players to 'self play' if vars['nozero_mcts']: mcts1 = Mcts(0, 0, zeroNN=None, max_acts_=vars['nozero_mcts_sims'], const_temp=1, noise=0, temp2zero_moves=3) mcts2 = Mcts(0, 0, zeroNN=None, max_acts_=vars['nozero_mcts_sims'], const_temp=1, noise=0, temp2zero_moves=3) else: mcts1 = Mcts(0, 0, zeroNN=zeroNN1, max_acts_=consts['mcts_sims'], const_temp=1, temp2zero_moves=3, noise=vars['noise'], resign_val=vars['resign_val']) mcts2 = Mcts(0, 0, zeroNN=zeroNN2, max_acts_=consts['mcts_sims'], const_temp=1, temp2zero_moves=3, noise=vars['noise'], resign_val=vars['resign_val']) t = time.time() logger.log( 'self_play' + str(idx) + ':', 'self_play_cnt=', vars['self_play_cnt'], ' net=', 'nomcts' if vars['nozero_mcts'] else vars['best_player_path'], '', ' self.resign_val=', vars['resign_val']) winrate1, winrate2, tie_rate, ai_hists = \ eval_mcts(consts['board_rows'], consts['board_cols'], consts['n_in_row'], mcts1, mcts2, False, vars['plays']//2, True) ai_hists = ZeroNNTrainer.hists2enhanced_train_data( ai_hists, consts) # Append the latest data to the old. # save the training data in case that we need to use them to continue training train_data = ZeroNNTrainer.manip_train_data( consts, vars, lock_train_data, ai_hists, best_player_path=best_player_path) logger.log('self_play' + str(idx) + ':', winrate1, winrate2, tie_rate, ' new data size=', ai_hists[0].shape, ' total data:', train_data[0].shape) vars['data_avail'] = True with lock_model_best: if vars['best_player_path'] != best_player_path: break print("self play over")
def evaluator(consts, vars, unchecked_model_paths, lock_model_paths, lock_model_best, logger): while not vars['model_avail']: time.sleep(60) with lock_model_paths: while len(unchecked_model_paths) >= 2: unchecked_model_paths.remove(unchecked_model_paths[0]) logger.log('evaluator start!') while vars['self_play_cnt'] > 0 or len(unchecked_model_paths) > 5: while len(unchecked_model_paths) == 0: time.sleep(20) # use 'with' to lock to avoid forgetting to release it with lock_model_paths: # wait for latest trained model if len(unchecked_model_paths) == 0: continue path_to_check = unchecked_model_paths.pop() logger.log('evaluator:', vars['best_player_path'], 'VS', path_to_check, '...') acts = min(256, consts['mcts_sims']) if vars['nozero_mcts'] == False: # join(shared_constants['folder_NNs'], 'model.ckpt-' + str(best_player_path)) best_mcts = Mcts(0, 0, zeroNN=ZeroNN( verbose=False, path=consts['folder_NNs'], ckpt_idx=vars['best_player_path']), max_acts_=acts, const_temp=0, noise=0, resign_val=vars['resign_val']) else: # When self.nozero_mcts is not None, the first generation of zeroNN is not generated yet # We double number of simulations since MCTS without zeroNN can make a faster searching, # which also means any trained model is able to defeat MCTS without zeroNN using doule simulations best_mcts = Mcts(0, 0, zeroNN=None, max_acts_=acts, const_temp=0, noise=0) zeroNN_to_check = ZeroNN(verbose=False, path=consts['folder_NNs'], ckpt_idx=path_to_check) mcts2 = Mcts(0, 0, zeroNN=zeroNN_to_check, max_acts_=acts, const_temp=0, noise=0, resign_val=vars['resign_val']) # the evaluation must be fast to select the best model # play only several games, but require the player to check have a overwhelming advantage over the existing player # if the best player is defeated, then the player to check can take the first place winrate1, winrate2, tie_rate, _ = \ eval_mcts(consts['board_rows'], consts['board_cols'], consts['n_in_row'], best_mcts, mcts2, False, 8, False) logger.log('evaluator:', vars['best_player_path'], 'VS', path_to_check, '--', winrate1, '-', winrate2, '-', tie_rate) # if the new player wins 4 more than the opponent out of 30 games, replace the best player with it if winrate2 - winrate1 >= 3 / (8 * 2) - 0.0001: vars['curr_generation'] += 1 logger.log('evaluator:', path_to_check, 'defeat', vars['best_player_path'], 'by', winrate2 - winrate1) logger.log(path_to_check, 'becomes generation', vars['curr_generation']) with lock_model_best: vars['best_player_path'] = path_to_check vars['nozero_mcts'] = False print("evaluator over")
def console(self): """ Use console to help training """ print("console started") while True: cmd = input() try: if cmd == 'md': data_path = input( 'Input the name of the data file(np file) exists. ' + 'The files should be named [selfplayxxx.npz]:\n') try: # data_path = [join(folder, npfn('selfplay' + str(i))) for i in range(3)] train_data = np.load(npfn(data_path)) best_player_path = data_path.split('y')[-1].split( '.')[0] train_data = ZeroNNTrainer.manip_train_data( self.shared_constants, self.shared_vars, self.lock_train_data, train_data, best_player_path=best_player_path) self.shared_vars['data_avail'] = True print("load new data succeessfully, data size = ", len(train_data[0])) except: print( "md error: folder or files do not exist or are invalid" ) del train_data elif cmd == 'pl': plays = input( "Input the number of games for one self-play, it should be a positive even number:\n" ) def plerr(): print("pl error: ", plays, " is invalid to be a self-play number") try: plays = int(plays) if plays > 0 and plays % 2 == 0: self.shared_vars['plays'] = plays print("self.plays -> ", self.shared_vars['plays']) else: plerr() except: plerr() elif cmd == 'bs': batch_size = input( "Input the number of batch size, it should be a positive number:\n" ) def bserr(): print("bs error") try: batch_size = int(batch_size) if batch_size > 0: self.batch_size = batch_size print("Batch size reset to", batch_size) else: bserr() except: bserr() elif cmd == 'bp': best_player_path = input( "Input num best player path, th current is " + str(self.shared_vars['best_player_path']) + ":\n") if str2int(best_player_path) is not None: best_player_path = join( shared_constants['folder_NNs'], 'model.ckpt-' + str(best_player_path)) if os.path.exists(best_player_path + '.index'): self.shared_vars['best_player_path'] = best_player_path print("best player path -> ", best_player_path) else: print("bp error,", best_player_path, 'is invalid') elif cmd == 'nm': def nmerr(): print("nm error") nozero_mcts_sims = input( "Input number of simulations of nozero_mcts's search. A non-positive number means deactivation\n" + " Currently, nozero_mcts is " + str(self.shared_vars['nozero_mcts']) + ". And nozero_mcts_sims is " + str(self.shared_vars['nozero_mcts_sims']) + ":\n") try: nozero_mcts_sims = int(nozero_mcts_sims) if nozero_mcts_sims > 0: self.shared_vars[ 'nozero_mcts_sims'] = nozero_mcts_sims self.shared_vars['nozero_mcts'] = True print( "nozero_mcts activated. And nozero_mcts_sims is ", nozero_mcts_sims) else: self.shared_vars['nozero_mcts'] = False print( "nozero_mcts deactivated. And nozero_mcts_sims is ", nozero_mcts_sims) except: nmerr() elif cmd == 'op': # print("Error: Op command is unavailable in MP mode") print("Start optimization now") self.shared_vars['data_avail'] = True elif cmd == 'pc': self_play_cnt = input("Input number of play count:\n") try: self_play_cnt = int(self_play_cnt) self.shared_vars['self_play_cnt'] = max( self_play_cnt, 0) print("self.self_play_cnt -> ", self.shared_vars['self_play_cnt']) except: print("pc error") elif cmd == 'te': # use te will clear the screen game = Game(self.board_rows, self.board_cols, self.n_in_row, Game.Player.AI, Game.Player.human, collect_ai_hists=False) zeroNN1 = ZeroNN(path=join(self.folder_NNs), ckpt_idx=-1) game.players[0].mcts.zeroNN = zeroNN1 game.players[0].mcts.max_acts = self.mcts_sims game.start(graphics=True) elif cmd == 'ns': noise = input("Current noise is " + str(self.shared_vars['noise']) + ", input new noise:") noise = to_type(noise, float) if noise is None: print("Invalid noise value") else: print("Set noise=", noise) self.shared_vars['noise'] = noise elif cmd == 'tr': # ratio = input("Train ratio") ratio = input("Current train ratio is " + str(self.train_ratio) + ", input new ratio:") ratio = to_type(ratio, float) if ratio is None: print("Invalid noise value") else: print("Set ratio=", ratio) self.train_ratio = ratio else: print("command error. (cmd=", cmd, ")") except: print("Unknown console error!")