Example #1
0
def MCTS_self_play(connectnet, num_games, start_idx, cpu, args, iteration):
    logger.info("[CPU: %d]: Starting MCTS self-play..." % cpu)

    if not os.path.isdir("./datasets/iter_%d" % iteration):
        if not os.path.isdir("datasets"):
            os.mkdir("datasets")
        os.mkdir("datasets/iter_%d" % iteration)

    for idxx in tqdm(range(start_idx, num_games + start_idx)):
        logger.info("[CPU: %d]: Game %d" % (cpu, idxx))
        current_board = c_board()
        checkmate = False
        dataset = []  # to get state, policy, value for neural network training
        states = []
        value = 0
        move_count = 0
        while checkmate == False and current_board.actions() != []:
            t = args.temperature_MCTS
            '''
            if move_count < 11:
                t = args.temperature_MCTS
            else:
                t = 0.1
            '''
            states.append(copy.deepcopy(current_board.current_board))
            board_state = copy.deepcopy(ed.encode_board(current_board))
            root = UCT_search(current_board, 777, connectnet, t)
            policy = get_policy(root, t)
            print("[CPU: %d]: Game %d POLICY:\n " % (cpu, idxx), policy)
            current_board = do_decode_n_move_pieces(current_board,\
                                                    np.random.choice(np.array([0,1,2,3,4,5,6,7,8]), \
                                                                     p = policy)) # decode move and move piece(s)
            dataset.append([board_state, policy])
            print(
                "[Iteration: %d CPU: %d]: Game %d CURRENT BOARD:\n" %
                (iteration, cpu, idxx), current_board.current_board,
                current_board.player)
            print(" ")
            if current_board.check_winner() == True:  # if somebody won
                if current_board.player == 0:  # black wins
                    value = -1
                elif current_board.player == 1:  # white wins
                    value = 1
                checkmate = True
            move_count += 1
        dataset_p = []
        for idx, data in enumerate(dataset):
            s, p = data
            if idx == 0:
                dataset_p.append([s, p, 0])
            else:
                dataset_p.append([s, p, value])
        del dataset
        save_as_pickle("iter_%d/" % iteration +\
                       "dataset_iter%d_cpu%i_%i_%s" % (iteration, cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")), dataset_p)
Example #2
0
def MCTS_self_play(connectnet, num_games, cpu):
    # We want to iterate over num_games games.
    for idxx in range(0, num_games):
        # Set initial variables.
        current_board = c_board()
        checkmate = False
        dataset = []  # to get state, policy, value for neural network training
        states = []
        value = 0
        move_count = 0
        # Keep playing as long as the game is unfinished.
        while checkmate == False and current_board.actions() != []:
            # If we are in the first 11 moves, we set the temperature to 1 (meaning more exploration), otherwise we set
            # it to 0.1.
            if move_count < 11:
                t = 1
            else:
                t = 0.1
            # Explore the current state up to 777 times. The exploration policy is determined by the output of the
            # network. See comment at the top of UCT_search function for more info.
            states.append(copy.deepcopy(current_board.current_board))
            board_state = copy.deepcopy(ed.encode_board(current_board))
            root = UCT_search(current_board, 777, connectnet, t)
            policy = get_policy(root, t)
            print(policy)
            current_board = do_decode_n_move_pieces(current_board,\
                                                    np.random.choice(np.array([0,1,2,3,4,5,6]), \
                                                                     p = policy)) # decode move and move piece(s)
            # Record the dataset for future deep learning training. And check if the game is over.
            dataset.append([board_state, policy])
            print(current_board.current_board, current_board.player)
            print(" ")
            if current_board.check_winner() == True:  # if somebody won
                if current_board.player == 0:  # black wins
                    value = -1
                elif current_board.player == 1:  # white wins
                    value = 1
                checkmate = True
            move_count += 1
            # End while loop

        dataset_p = []
        for idx, data in enumerate(dataset):
            state, policy = data
            if idx == 0:
                dataset_p.append([state, policy, 0])
            else:
                dataset_p.append([state, policy, value])
        del dataset
        save_as_pickle(
            "dataset_cpu%i_%i_%s" %
            (cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")),
            dataset_p)
def MCTS_self_play(connectnet, num_games, start_idx, cpu, args, iteration):
    """
    Play with itself using the NN with MCTS.
    :param connectnet: pytorch model
    :param num_games:
    :param start_idx:
    :param cpu:
    :param args:
    :param iteration:
    :return:
    """
    logger.info("[CPU: %d]: Starting MCTS self-play..." % cpu)
    
    if not os.path.isdir("./datasets/iter_%d" % iteration):
        if not os.path.isdir("datasets"):
            os.mkdir("datasets")
        os.mkdir("datasets/iter_%d" % iteration)
        
    for idxx in tqdm(range(start_idx, num_games + start_idx)):
        logger.info("[CPU: %d]: Game %d" % (cpu, idxx))
        current_board = c_board()
        checkmate = False
        dataset = [] # to get state, policy, value for neural network training
        states = []
        value = 0
        move_count = 0
        # when the game already produced an outcome, or there is no more room for play, stop
        while checkmate == False and current_board.actions() != []:
            if move_count < 11:
                t = args.temperature_MCTS
            else:
                t = 0.1
            states.append(copy.deepcopy(current_board.current_board))
            board_state = copy.deepcopy(ed.encode_board(current_board))
            root = UCT_search(current_board,777,connectnet,t)
            policy = get_policy(root, t)
            print("[CPU: %d]: Game %d POLICY:\n " % (cpu, idxx), policy)
            current_board = do_decode_n_move_pieces(current_board,\
                                                    np.random.choice(np.array([0,1,2,3,4,5,6]), \
                                                    p = policy)) # decode move and move piece(s)
            dataset.append([board_state,policy])
            print("[Iteration: %d CPU: %d]: Game %d CURRENT BOARD:\n" % \
                  (iteration, cpu, idxx), current_board.current_board,current_board.player)
            print(" ")
            if current_board.check_winner() == True: # if somebody won
                if current_board.player == 0: # black wins
                    value = -1
                elif current_board.player == 1: # white wins
                    value = 1
                checkmate = True
            move_count += 1
        dataset_p = []
        for idx,data in enumerate(dataset):
            s,p = data # s,p are states, policy;
            if idx == 0: # the root has value 0
                dataset_p.append([s,p,0])
            else: # all the other nodes has a value of -1 or 1 (fixed, same)
                dataset_p.append([s,p,value])
        # I remember only certain things are stored, not all the nodes in MC simulations
        del dataset
        # Why saving the dataset_p for later training of the network?
        save_as_pickle("iter_%d/" % iteration +\
                       "dataset_iter%d_cpu%i_%i_%s" % \
                       (iteration, cpu, idxx, \
                        datetime.datetime.today().strftime("%Y-%m-%d")), dataset_p)