def run_MCTS(args, start_idx=0, iteration=0): net_to_play="%s_iter%d.pth.tar" % (args.neural_net_name, iteration) net = ConnectNet() cuda = torch.cuda.is_available() if cuda: net.cuda() if args.MCTS_num_processes > 1: logger.info("Preparing model for multi-process MCTS...") mp.set_start_method("spawn",force=True) net.share_memory() net.eval() current_net_filename = os.path.join("./model_data/",\ net_to_play) if os.path.isfile(current_net_filename): checkpoint = torch.load(current_net_filename) net.load_state_dict(checkpoint['state_dict']) logger.info("Loaded %s model." % current_net_filename) else: torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\ net_to_play)) logger.info("Initialized model.") processes = [] if args.MCTS_num_processes > mp.cpu_count(): num_processes = mp.cpu_count() logger.info("Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d" % num_processes) else: num_processes = args.MCTS_num_processes logger.info("Spawning %d processes..." % num_processes) with torch.no_grad(): for i in range(num_processes): p = mp.Process(target=MCTS_self_play, args=(net, args.num_games_per_MCTS_process, start_idx, i, args, iteration)) p.start() processes.append(p) for p in processes: p.join() logger.info("Finished multi-process MCTS!") elif args.MCTS_num_processes == 1: logger.info("Preparing model for MCTS...") net.eval() current_net_filename = os.path.join("./model_data/",\ net_to_play) if os.path.isfile(current_net_filename): checkpoint = torch.load(current_net_filename) net.load_state_dict(checkpoint['state_dict']) logger.info("Loaded %s model." % current_net_filename) else: torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\ net_to_play)) logger.info("Initialized model.") with torch.no_grad(): MCTS_self_play(net, args.num_games_per_MCTS_process, start_idx, 0, args, iteration) logger.info("Finished MCTS!")
def train_chessnet(args, iteration, new_optim_state): # gather data logger.info("Loading training data...") data_path = "./datasets/iter_%d/" % iteration datasets = [] for idx, file in enumerate(os.listdir(data_path)): filename = os.path.join(data_path, file) with open(filename, 'rb') as fo: datasets.extend(pickle.load(fo, encoding='bytes')) datasets = np.array(datasets) logger.info("Loaded data from %s." % data_path) # train net net = ConnectNet() cuda = torch.cuda.is_available() if cuda: net.cuda() optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.8, 0.999)) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150, 300, 400], gamma=0.77) start_epoch = load_state(net, optimizer, scheduler, args, iteration, new_optim_state) train(net, datasets, optimizer, scheduler, start_epoch, 0, args, iteration)
def train_connectnet(args, iteration, new_optim_state): # gather data logger.info("Loading training data...") data_path = "./datasets/iter_%d/" % iteration datasets = [] for file in os.listdir(data_path): filename = os.path.join(data_path, file) with open(filename, 'rb') as fo: datasets.extend(pickle.load(fo, encoding='bytes')) datasets = np.array(datasets) logger.info("Loaded data from %s." % data_path) # train net net = ConnectNet() if cuda := torch.cuda.is_available(): net.cuda()
def evaluate_nets(args, iteration_1, iteration_2): logger.info("Loading nets...") current_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_2) best_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_1) current_net_filename = os.path.join("./model_data/",\ current_net) best_net_filename = os.path.join("./model_data/",\ best_net) logger.info("Current net: %s" % current_net) logger.info("Previous (Best) net: %s" % best_net) current_cnet = ConnectNet() best_cnet = ConnectNet() if cuda := torch.cuda.is_available(): current_cnet.cuda() best_cnet.cuda()
def play_ia(game, options): made_moves, convertedMatrix = convert_matrix_to_alpha_zero(game) print(convertedMatrix) ######################################################################### # AlphaZero ######################################################################### #best_net="c4_current_net_trained2_iter7.pth.tar" best_net="c4_current_net_trained_iter8.pth.tar" best_net_filename = os.path.join("C:\\Users\\Francesco\\Documents\\2_Schule\\HYU\\HYU-software-engineering-AI\\src_backend\\src\\ai_modules\\AlphaZero_Connect4\\src\\model_data\\", best_net) best_cnet = ConnectNet() cuda = torch.cuda.is_available() if cuda: best_cnet.cuda() best_cnet.eval() checkpoint = torch.load(best_net_filename) best_cnet.load_state_dict(checkpoint['state_dict']) net = best_cnet white = None black = net current_board = cboard() current_board.current_board = np.array(convertedMatrix) checkmate = False dataset = [] value = 0; t = 0.1; moves_count = made_moves moves_count += 1 dataset.append(copy.deepcopy(ed.encode_board(current_board))) print("AI is thinking.............") root = UCT_search(current_board,777,black,t) policy = get_policy(root, t) current_board = do_decode_n_move_pieces(current_board, np.random.choice(np.array([0,1,2,3,4,5,6]), p = policy)) # decode move and move piece(s) print(current_board.current_board); print(" ") return find_which_move_ai_made(convertedMatrix, current_board.current_board)
def evaluate_nets(args, iteration_1, iteration_2): logger.info("Loading nets...") current_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_2) best_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_1) current_net_filename = os.path.join("./model_data/",\ current_net) best_net_filename = os.path.join("./model_data/",\ best_net) logger.info("Current net: %s" % current_net) logger.info("Previous (Best) net: %s" % best_net) current_cnet = ConnectNet() best_cnet = ConnectNet() cuda = torch.cuda.is_available() if cuda: current_cnet.cuda() best_cnet.cuda() if not os.path.isdir("./evaluator_data/"): os.mkdir("evaluator_data") if args.MCTS_num_processes > 1: mp.set_start_method("spawn", force=True) current_cnet.share_memory() best_cnet.share_memory() current_cnet.eval() best_cnet.eval() checkpoint = torch.load(current_net_filename) current_cnet.load_state_dict(checkpoint['state_dict']) checkpoint = torch.load(best_net_filename) best_cnet.load_state_dict(checkpoint['state_dict']) processes = [] if args.MCTS_num_processes > mp.cpu_count(): num_processes = mp.cpu_count() logger.info( "Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d" % num_processes) else: num_processes = args.MCTS_num_processes logger.info("Spawning %d processes..." % num_processes) with torch.no_grad(): for i in range(num_processes): p = mp.Process(target=fork_process, args=(arena(current_cnet, best_cnet), args.num_evaluator_games, i)) p.start() processes.append(p) for p in processes: p.join() wins_ratio = 0.0 for i in range(num_processes): stats = load_pickle("wins_cpu_%i" % (i)) wins_ratio += stats['best_win_ratio'] wins_ratio = wins_ratio / num_processes if wins_ratio >= 0.55: return iteration_2 else: return iteration_1 elif args.MCTS_num_processes == 1: current_cnet.eval() best_cnet.eval() checkpoint = torch.load(current_net_filename) current_cnet.load_state_dict(checkpoint['state_dict']) checkpoint = torch.load(best_net_filename) best_cnet.load_state_dict(checkpoint['state_dict']) arena1 = arena(current_cnet=current_cnet, best_cnet=best_cnet) arena1.evaluate(num_games=args.num_evaluator_games, cpu=0) stats = load_pickle("wins_cpu_%i" % (0)) if stats.best_win_ratio >= 0.55: return iteration_2 else: return iteration_1
dataset.append('Human as white wins') print("YOU WIN!!!!!!!!!!!") else: dataset.append('AI as white wins') print("YOU LOSE!!!!!!!") return "white", dataset else: dataset.append("Nobody wins") print("DRAW!!!!!") return None, dataset if __name__ == "__main__": best_net="c4_current_net_trained1_iter6.pth.tar" best_net_filename = os.path.join("./model_data/",\ best_net) best_cnet = ConnectNet() if cuda := torch.cuda.is_available(): best_cnet.cuda() best_cnet.eval() checkpoint = torch.load(best_net_filename) best_cnet.load_state_dict(checkpoint['state_dict']) play_again = True while play_again: play_game(best_cnet) while True: again = input("Do you wanna play again? (Y/N)\n") if again.lower() in ["y", "n"]: if again.lower() == "n": play_again = False break
arena_obj.evaluate(num_games, cpu) if __name__ == "__main__": multiprocessing = 0 current_net = "c4_current_net_trained2_iter0.pth.tar" best_net = "current_net_trained1_iter2.pth.tar" current_net_filename = os.path.join("./model_data/",\ current_net) best_net_filename = os.path.join("./model_data/",\ best_net) current_cnet = ConnectNet() best_cnet = ConnectNet() cuda = torch.cuda.is_available() if cuda: current_cnet.cuda() best_cnet.cuda() if multiprocessing == 1: mp.set_start_method("spawn", force=True) current_cnet.share_memory() best_cnet.share_memory() current_cnet.eval() best_cnet.eval() checkpoint = torch.load(current_net_filename) current_cnet.load_state_dict(checkpoint['state_dict']) checkpoint = torch.load(best_net_filename) best_cnet.load_state_dict(checkpoint['state_dict'])
def run_MCTS(args, start_idx=0, iteration=0): net_to_play="%s_iter%d.pth.tar" % (args.neural_net_name, iteration) net = ConnectNet() if cuda := torch.cuda.is_available(): net.cuda()
del dataset save_as_pickle( "dataset_cpu%i_%i_%s" % (cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")), dataset_p) if __name__ == "__main__": multiprocessing = 1 if multiprocessing == 1: net_to_play = "c4_current_net_trained2_iter5.pth.tar" mp.set_start_method("spawn", force=True) net = ConnectNet() cuda = torch.cuda.is_available() if cuda: net.cuda() net.share_memory() net.eval() print("hi") # torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\ # "c4_current_net.pth.tar")) current_net_filename = os.path.join("./model_data/",\ net_to_play) checkpoint = torch.load(current_net_filename) net.load_state_dict(checkpoint['state_dict']) processes = [] for i in range(6): p = mp.Process(target=MCTS_self_play, args=(net, 25, i)) p.start() processes.append(p)