def arena_process(i): g = Game(8) nnet = nn(g) nnet.load_model(filename=("model_auto_" + str(i + 1))) nmcts = MCTS(g, nnet, args) pnet = nn(g) if i != 0: pnet.load_model(filename=("model_auto_" + str(i))) pmcts = MCTS(g, pnet, args) def player1(x): pi = pmcts.get_action_prob(x) # display_pi(np.array(pi[:-1]).reshape((len(x), len(x)))) return np.random.choice(len(pi), p=pi) def player2(x): pi = nmcts.get_action_prob(x) return np.random.choice(len(pi), p=pi) arena = Arena(player1=lambda x: player1(x), player2=lambda x: player2(x), game=g, display=display) return arena.play_games(8)
def arena_process(self, r, old_model_file, new_model_file, verbose=False): old_net = nn(self.game) if len(old_model_file) > 1: old_net.load_model(filename=old_model_file) else: print('random state') # old_net.load_model(filename=old_model_file) old_mcts = MCTS(self.game, old_net, self.args) new_net = nn(self.game) new_net.load_model(filename=new_model_file) new_mcts = MCTS(self.game, new_net, self.args) def old_player(x): pi = old_mcts.get_action_prob(x, self.args['numMCTSSims']) # display_pi(np.array(pi[:-1]).reshape((len(x), len(x)))) return np.random.choice(len(pi), p=pi) def new_player(x): pi = new_mcts.get_action_prob(x, int(self.args['numMCTSSims'] * 1)) return np.random.choice(len(pi), p=pi) arena = Arena(player1=lambda x: old_player(x), player2=lambda x: new_player(x), game=self.game, display=display) return arena.play_games(r, verbose=verbose)
def mcts_test(): g = Game(15) b = Board(15) nnet = nn(g) mcts = MCTS(g, nnet, args) b.execute_move((4, 1), 1) b.execute_move((3, 2), 1) b.execute_move((2, 3), 1) b.execute_move((5, 0), 1) # b.execute_move((1, 4), 1) b.execute_move((3, 3), -1) b.execute_move((3, 4), -1) b.execute_move((3, 5), -1) b.execute_move((3, 6), -1) # b.execute_move((3, 5), -1) curPlayer = 1 canonicalBoard = g.get_canonical_form(np.array(b.pieces), curPlayer) pi = mcts.get_action_prob(canonicalBoard) display(canonicalBoard) display_pi(np.array(pi[:-1]).reshape((len(canonicalBoard), len(canonicalBoard))))
def main(): # log.info('Loading %s...', Game.__name__) print('Loading Inception TicTacToe') # g = Game(6) # log.info('Loading %s...', nn.__name__) nnet = nn() if args['load_model']: # log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) print('Loading checkpoint: ', args['load_folder_file']) nnet.load_checkpoint(args['load_folder_file'][0], args['load_folder_file'][1]) else: # log.warning('Not loading a checkpoint!') print('Not loading a checkpoint') # log.info('Loading the Coach...') print('Loading the Coach') c = Coach(g, nnet, args) if args['load_model']: # log.info("Loading 'trainExamples' from file...") print('Loading \'trainExamples\' from file...') c.loadTrainExamples() # log.info('Starting the learning process 🎉') print('Starting the learning process 🎉') c.learn()
def __init__(self, player_num): super(NNetPlayer, self).__init__(Player.NNET, player_num) # TODO: edit this to load the best weights self.nnet = nn() self.move_to_index = pickle.load(open("AllPossibleActionDict.p", "rb")) self.move_array = pickle.load(open("AllPossibleActionVector.p", "rb"))
def generate_data_debug(self, model_file): nnet = nn(self.game) if len(model_file) > 1: nnet.load_model(filename=model_file) else: print('random state') c = Coach(self.game, nnet, self.args) train_example = c.execute_episode() l_sum_up = [(np.sum(i[0]), i[2]) for i in train_example] print(sum(i == (0, -1) or i == (-1, 1) for i in l_sum_up)) # second hand win print(sum(i == (0, 1) or i == (-1, -1) for i in l_sum_up)) # first hand win
def generate_data(l): g = Game(args.goBang_n) nnet = nn(g) c = Coach(g, nnet, args) train_example = c.execute_episode() l.acquire() try: folder = args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + "train_examples_4") with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
def parallelEpisode(argsIteration): iteration = argsIteration[0] args = argsIteration[1] move_to_index = argsIteration[2] print("EXECUTING EPISODE" + str(iteration)) trainExamples = [] deck = Deck() players = [MCTSNNPlayer(1, args['num_simulations']), \ MCTSNNPlayer(2, args['num_simulations']), MCTSNNPlayer(3, args['num_simulations'])] board = Board(players, True) winners = None num_winners = 0 nnet = nn() nnet.load_checkpoint(args['load_folder_file'][0], args['load_folder_file'][1]) while True: AI = MCTSNN(board, args['num_simulations'], deck, \ board.active_player.player_num, nnet, move_to_index) pi = AI.getActionProb(temp=1) action = np.random.choice(len(pi), p=pi) canonicalBoard = AI.canonicalBoard trainExamples.append([canonicalBoard, \ board.active_player.player_num, pi]) move = StateToFeatures.action_to_move(action, board.active_player.move_array, \ board.active_player, len(board.players), deck) board.active_player.make_move(move, board, deck, players) if board.active_player.calculate_vp() >= settings.POINTS_TO_WIN: winners = set() winners.add(board.active_player.player_num) num_winners = 1 if board.round_num >= args['round_threshold']: vps = [player.calculate_vp() for player in players] most = max(vps) winners = set() for i in range(len(vps)): if vps[i] == most: winners.add(i + 1) num_winners += 1 if winners: train = [[x[0], x[2], \ (-1) ** int(x[1] not in winners) / (1 if x[1] not in winners else num_winners)] \ for x in trainExamples] saveEpisodeTrainExamples(args, iteration, train) return train
def generate_data(l, model_iter): g = Game(8) nnet = nn(g) nnet.load_model(filename=("model_auto_" + str(model_iter + 1))) c = Coach(g, nnet, args) train_example = c.execute_episode() l.acquire() try: folder = args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + ("train_examples_auto_" + str(model_iter + 1))) with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
def generate_data(self, l, model_file, train_example_filename): nnet = nn(self.game) if len(model_file) > 2: nnet.load_model(filename=model_file) else: print('random nn model') c = Coach(self.game, nnet, self.args) train_example = c.execute_episode() l.acquire() try: folder = self.args['checkpoint'] if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + train_example_filename) with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
def main(): log.info('Loading %s...', Game.__name__) g = Game() log.info('Loading %s...', nn.__name__) nnet = nn(g) if args.load_model: log.info('Loading checkpoint "%s/%s"...', args.load_folder_file) nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) else: log.warning('Not loading a checkpoint!') log.info('Loading the Coach...') c = Coach(g, nnet, args) if args.load_model: log.info("Loading 'trainExamples' from file...") c.loadTrainExamples() log.info('Starting the learning process 🎉') c.learn()
for j in range(len(mat[0])): if mat[i][j] == 0: matnew[i][j][0] = 0 matnew[i][j][1] = -1 else: matnew[i][j][0] = np.abs(mat[i][j]) matnew[i][j][1] = np.sign(mat[i][j]) g = Game(5) # all players rp = RandomPlayer(g).play # nnet players n1 = nn(g) checkpoint = torch.load("best.pth.tar") n1.nnet.load_state_dict(checkpoint['state_dict']) args1 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts1 = MCTS(g, n1, args1) n1p = lambda x: np.argmax(mcts1.getActionProb(x, temp=0)) n2 = nn(g) checkpoint = torch.load("checkpoint_2.pth.tar") n2.nnet.load_state_dict(checkpoint['state_dict']) args2 = dotdict({'numMCTSSims': 50, 'cpuct': 1.0}) mcts2 = MCTSnn(g, n2, args2) n2p = lambda x: np.argmax(mcts2.getActionProb(x, temp=0)) #n2p=lambda x: alpha.alphaZeroSearch()
def __init__(self, args): #self.nnet = nnet self.nnet = nn() self.args = args self.trainExamplesHistory = [] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.move_to_index = pickle.load(open("AllPossibleActionDict.p", "rb"))
if __name__ == "__main__": #freeze_support() # Start processes with lower priority to prevent system overload/hangs/freezes. Also set multiprocessing start method to spawn for Linux, since forking makes trouble p = psutil.Process(os.getpid()) if sys.platform.startswith('win32'): p.nice(psutil.BELOW_NORMAL_PRIORITY_CLASS) elif sys.platform.startswith('linux'): p.nice(5) mp.set_start_method('spawn') # Set number of threads for OpenMP in PyTorch os.environ["OMP_NUM_THREADS"] = "1" g = Game(is_basic=True) # Suppress logging from fireplace logger = logging.getLogger("fireplace") logger.setLevel(logging.WARNING) nnet = nn() if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
with open(filename) as f: data = json.load(f) current_nn = None for player in data["players"]: if player["type"] == "human": player_list.append(Human(index)) elif player["type"] == "random": player_list.append(RandomPlayer(index)) elif player["type"] == "MCTS": MCTS_player = MCTSPlayer(index, int(player["num_simulations"])) player_list.append(MCTS_player) elif player["type"] == "MCTSNN": player_list.append(MCTSNNPlayer(index, int(player["num_simulations"]))) modelFile = os.path.join("trainExamplesMCTS/", "temp.pth.tar") current_nn = nn() if os.path.isfile(modelFile): print("using saved weights!") current_nn.nnet.model.load_weights(modelFile) elif player["type"] == "NN": player_list.append(NNetPlayer(index, int(player["num_simulations"]))) modelFile = os.path.join("trainExamplesMCTS/", "temp.pth.tar") current_nn = nn() if os.path.isfile(modelFile): current_nn.nnet.model.load_weights(modelFile) index += 1 record_file = int(data["record_data"]) if record_file: fields=['Winner Type','Winner Num','First Player Type', 'First Player VP', 'Second Player Type', 'Second Player VP',
'tempThreshold': 15, 'updateThreshold': 0.6, 'maxlenOfQueue': 200000, 'numMCTSSims': 25, 'arenaCompare': 40, # 'cpuct': 1, 'checkpoint': './log', 'load_model': False, 'load_folder_file': ('./temp', 'best.pth.tar'), 'load_folder_examples': ('./temp', 'checkpoint_1.pth.tar'), ## 'save_log_dir': None, 'numItersForTrainExamplesHistory': 20, 'start_iter': 0, ## 'prevEloScore': [0, 0], ## 'print': True, }) if __name__ == "__main__": g = Game(6) nnet = nn(g, args) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.loadTrainExamples() c.learn()
'numEps': 10, 'tempThreshold': 100, 'updateThreshold': 0.6, 'maxlenOfQueue': 2000000, 'numMCTSSims': 1000, 'arenaCompare': 64, 'cpuct': 3, 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, } if __name__ == "x": g = Game(15) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.load_train_examples() c.learn() if __name__ == '__main__': g = Game(15) auto_run = AutoRun(g, args)
def __init__(self): # self.trainExamplesHistory = [] # history of examples from args.numItersForTrainExamplesHistory latest iterations self.nnet = nn()