def arena_process(i): g = Game(8) nnet = nn(g) nnet.load_model(filename=("model_auto_" + str(i + 1))) nmcts = MCTS(g, nnet, args) pnet = nn(g) if i != 0: pnet.load_model(filename=("model_auto_" + str(i))) pmcts = MCTS(g, pnet, args) def player1(x): pi = pmcts.get_action_prob(x) # display_pi(np.array(pi[:-1]).reshape((len(x), len(x)))) return np.random.choice(len(pi), p=pi) def player2(x): pi = nmcts.get_action_prob(x) return np.random.choice(len(pi), p=pi) arena = Arena(player1=lambda x: player1(x), player2=lambda x: player2(x), game=g, display=display) return arena.play_games(8)
def mcts_test(): g = Game(15) b = Board(15) nnet = nn(g) mcts = MCTS(g, nnet, args) b.execute_move((4, 1), 1) b.execute_move((3, 2), 1) b.execute_move((2, 3), 1) b.execute_move((5, 0), 1) # b.execute_move((1, 4), 1) b.execute_move((3, 3), -1) b.execute_move((3, 4), -1) b.execute_move((3, 5), -1) b.execute_move((3, 6), -1) # b.execute_move((3, 5), -1) curPlayer = 1 canonicalBoard = g.get_canonical_form(np.array(b.pieces), curPlayer) pi = mcts.get_action_prob(canonicalBoard) display(canonicalBoard) display_pi(np.array(pi[:-1]).reshape((len(canonicalBoard), len(canonicalBoard))))
def generate_data(l): g = Game(args.goBang_n) nnet = nn(g) c = Coach(g, nnet, args) train_example = c.execute_episode() l.acquire() try: folder = args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + "train_examples_4") with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
def generate_data(l, model_iter): g = Game(8) nnet = nn(g) nnet.load_model(filename=("model_auto_" + str(model_iter + 1))) c = Coach(g, nnet, args) train_example = c.execute_episode() l.acquire() try: folder = args.checkpoint if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder + ("train_examples_auto_" + str(model_iter + 1))) with open(filename, "ab+") as f: pickle.dump(train_example, f) finally: l.release()
'numIters': 10, 'numEps': 10, 'tempThreshold': 100, 'updateThreshold': 0.6, 'maxlenOfQueue': 2000000, 'numMCTSSims': 1000, 'arenaCompare': 64, 'cpuct': 3, 'checkpoint': './temp/', 'load_model': False, 'load_folder_file': ('/dev/models/8x100x50', 'best.pth.tar'), 'numItersForTrainExamplesHistory': 20, } if __name__ == "x": g = Game(15) nnet = nn(g) if args.load_model: nnet.load_checkpoint(args.load_folder_file[0], args.load_folder_file[1]) c = Coach(g, nnet, args) if args.load_model: print("Load trainExamples from file") c.load_train_examples() c.learn() if __name__ == '__main__': g = Game(15) auto_run = AutoRun(g, args)
lock = Lock() for iteration in range(200): jobs = [] for _ in range(8): p = Process(target=generate_data, args=(lock, i)) jobs.append(p) p.start() for job in jobs: job.join() trainExamples = merge_data(i) print(len(trainExamples)) g = Game(8) nnet = nn(g) nnet.train(trainExamples) nnet.save_model(filename="model_auto_" + str(i + 2)) else: break print(i, 'one model') if __name__ == 'x': with Pool(8) as p: result = p.map(arena_process, range(8)) win_1 = sum([i[0] for i in result]) win_2 = sum([i[1] for i in result])