Ejemplo n.º 1
0
def run_MCTS(args, start_idx=0, iteration=0):
    net_to_play="%s_iter%d.pth.tar" % (args.neural_net_name, iteration)
    net = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        net.cuda()
    
    if args.MCTS_num_processes > 1:
        logger.info("Preparing model for multi-process MCTS...")
        mp.set_start_method("spawn",force=True)
        net.share_memory()
        net.eval()
    
        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        if os.path.isfile(current_net_filename):
            checkpoint = torch.load(current_net_filename)
            net.load_state_dict(checkpoint['state_dict'])
            logger.info("Loaded %s model." % current_net_filename)
        else:
            torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
                        net_to_play))
            logger.info("Initialized model.")
        
        processes = []
        if args.MCTS_num_processes > mp.cpu_count():
            num_processes = mp.cpu_count()
            logger.info("Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d" % num_processes)
        else:
            num_processes = args.MCTS_num_processes
        
        logger.info("Spawning %d processes..." % num_processes)
        with torch.no_grad():
            for i in range(num_processes):
                p = mp.Process(target=MCTS_self_play, args=(net, args.num_games_per_MCTS_process,
                                                            start_idx, i, args, iteration))
                p.start()
                processes.append(p)
            for p in processes:
                p.join()
        logger.info("Finished multi-process MCTS!")
    
    elif args.MCTS_num_processes == 1:
        logger.info("Preparing model for MCTS...")
        net.eval()
        
        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        if os.path.isfile(current_net_filename):
            checkpoint = torch.load(current_net_filename)
            net.load_state_dict(checkpoint['state_dict'])
            logger.info("Loaded %s model." % current_net_filename)
        else:
            torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
                        net_to_play))
            logger.info("Initialized model.")
        
        with torch.no_grad():
            MCTS_self_play(net, args.num_games_per_MCTS_process, start_idx, 0, args, iteration)
        logger.info("Finished MCTS!")
Ejemplo n.º 2
0
def play_ia(game, options):

	made_moves, convertedMatrix = convert_matrix_to_alpha_zero(game)

	print(convertedMatrix)

	#########################################################################
	# AlphaZero
	#########################################################################
	
	#best_net="c4_current_net_trained2_iter7.pth.tar"
	best_net="c4_current_net_trained_iter8.pth.tar"
	best_net_filename = os.path.join("C:\\Users\\Francesco\\Documents\\2_Schule\\HYU\\HYU-software-engineering-AI\\src_backend\\src\\ai_modules\\AlphaZero_Connect4\\src\\model_data\\", best_net)
	best_cnet = ConnectNet()
	cuda = torch.cuda.is_available()
	if cuda:
		best_cnet.cuda()
	best_cnet.eval()
	checkpoint = torch.load(best_net_filename)
	best_cnet.load_state_dict(checkpoint['state_dict'])
	
	net = best_cnet

	white = None
	black = net
	current_board = cboard()
	current_board.current_board = np.array(convertedMatrix)

	checkmate = False
	dataset = []
	value = 0; t = 0.1; moves_count = made_moves

	moves_count += 1
	dataset.append(copy.deepcopy(ed.encode_board(current_board)))

	print("AI is thinking.............")
	root = UCT_search(current_board,777,black,t)
	policy = get_policy(root, t)

	current_board = do_decode_n_move_pieces(current_board, np.random.choice(np.array([0,1,2,3,4,5,6]), p = policy)) # decode move and move piece(s)

	print(current_board.current_board); print(" ")

	return find_which_move_ai_made(convertedMatrix, current_board.current_board)
	
Ejemplo n.º 3
0
def evaluate_nets(args, iteration_1, iteration_2):
    logger.info("Loading nets...")
    current_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_2)
    best_net = "%s_iter%d.pth.tar" % (args.neural_net_name, iteration_1)
    current_net_filename = os.path.join("./model_data/",\
                                    current_net)
    best_net_filename = os.path.join("./model_data/",\
                                    best_net)

    logger.info("Current net: %s" % current_net)
    logger.info("Previous (Best) net: %s" % best_net)

    current_cnet = ConnectNet()
    best_cnet = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        current_cnet.cuda()
        best_cnet.cuda()

    if not os.path.isdir("./evaluator_data/"):
        os.mkdir("evaluator_data")

    if args.MCTS_num_processes > 1:
        mp.set_start_method("spawn", force=True)

        current_cnet.share_memory()
        best_cnet.share_memory()
        current_cnet.eval()
        best_cnet.eval()

        checkpoint = torch.load(current_net_filename)
        current_cnet.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load(best_net_filename)
        best_cnet.load_state_dict(checkpoint['state_dict'])

        processes = []
        if args.MCTS_num_processes > mp.cpu_count():
            num_processes = mp.cpu_count()
            logger.info(
                "Required number of processes exceed number of CPUs! Setting MCTS_num_processes to %d"
                % num_processes)
        else:
            num_processes = args.MCTS_num_processes
        logger.info("Spawning %d processes..." % num_processes)
        with torch.no_grad():
            for i in range(num_processes):
                p = mp.Process(target=fork_process,
                               args=(arena(current_cnet, best_cnet),
                                     args.num_evaluator_games, i))
                p.start()
                processes.append(p)
            for p in processes:
                p.join()

        wins_ratio = 0.0
        for i in range(num_processes):
            stats = load_pickle("wins_cpu_%i" % (i))
            wins_ratio += stats['best_win_ratio']
        wins_ratio = wins_ratio / num_processes
        if wins_ratio >= 0.55:
            return iteration_2
        else:
            return iteration_1

    elif args.MCTS_num_processes == 1:
        current_cnet.eval()
        best_cnet.eval()
        checkpoint = torch.load(current_net_filename)
        current_cnet.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load(best_net_filename)
        best_cnet.load_state_dict(checkpoint['state_dict'])
        arena1 = arena(current_cnet=current_cnet, best_cnet=best_cnet)
        arena1.evaluate(num_games=args.num_evaluator_games, cpu=0)

        stats = load_pickle("wins_cpu_%i" % (0))
        if stats.best_win_ratio >= 0.55:
            return iteration_2
        else:
            return iteration_1
Ejemplo n.º 4
0
            dataset.append('Human as white wins')
            print("YOU WIN!!!!!!!!!!!")
        else:
            dataset.append('AI as white wins')
            print("YOU LOSE!!!!!!!")
        return "white", dataset
    else:
        dataset.append("Nobody wins")
        print("DRAW!!!!!")
        return None, dataset

if __name__ == "__main__":
    best_net="c4_current_net_trained1_iter6.pth.tar"
    best_net_filename = os.path.join("./model_data/",\
                                    best_net)
    best_cnet = ConnectNet()
    if cuda := torch.cuda.is_available():
        best_cnet.cuda()
    best_cnet.eval()
    checkpoint = torch.load(best_net_filename)
    best_cnet.load_state_dict(checkpoint['state_dict'])
    play_again = True
    while play_again:
        play_game(best_cnet)
        while True:
            again = input("Do you wanna play again? (Y/N)\n")
            if again.lower() in ["y", "n"]:
                if again.lower() == "n":
                    play_again = False
                break
                
                                    current_net)
    best_net_filename = os.path.join("./model_data/",\
                                    best_net)
    current_cnet = ConnectNet()
    best_cnet = ConnectNet()
    cuda = torch.cuda.is_available()
    if cuda:
        current_cnet.cuda()
        best_cnet.cuda()

    if multiprocessing == 1:
        mp.set_start_method("spawn", force=True)

        current_cnet.share_memory()
        best_cnet.share_memory()
        current_cnet.eval()
        best_cnet.eval()

        checkpoint = torch.load(current_net_filename)
        current_cnet.load_state_dict(checkpoint['state_dict'])
        checkpoint = torch.load(best_net_filename)
        best_cnet.load_state_dict(checkpoint['state_dict'])

        processes = []
        for i in range(6):
            p = mp.Process(target=fork_process,
                           args=(arena(current_cnet, best_cnet), 10, i))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()
Ejemplo n.º 6
0
            "dataset_cpu%i_%i_%s" %
            (cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")),
            dataset_p)


if __name__ == "__main__":
    multiprocessing = 1
    if multiprocessing == 1:
        net_to_play = "c4_current_net_trained2_iter5.pth.tar"
        mp.set_start_method("spawn", force=True)
        net = ConnectNet()
        cuda = torch.cuda.is_available()
        if cuda:
            net.cuda()
        net.share_memory()
        net.eval()
        print("hi")
        # torch.save({'state_dict': net.state_dict()}, os.path.join("./model_data/",\
        #                                "c4_current_net.pth.tar"))

        current_net_filename = os.path.join("./model_data/",\
                                        net_to_play)
        checkpoint = torch.load(current_net_filename)
        net.load_state_dict(checkpoint['state_dict'])
        processes = []
        for i in range(6):
            p = mp.Process(target=MCTS_self_play, args=(net, 25, i))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()