Ejemplo n.º 1
0
 def play_round(self):
     logger.info("Starting game round...")
     if np.random.uniform(0, 1) <= 0.5:
         white = self.current
         black = self.best
         w = "current"
         b = "best"
     else:
         white = self.best
         black = self.current
         w = "best"
         b = "current"
     current_board = cboard()
     checkmate = False
     dataset = []
     value = 0
     t = 0.1
     while checkmate == False and current_board.actions() != []:
         dataset.append(copy.deepcopy(ed.encode_board(current_board)))
         print("")
         print(current_board.current_board)
         if current_board.player == 0:
             root = UCT_search(current_board, 777, white, t)
             policy = get_policy(root, t)
             print("Policy: ", policy, "white = %s" % (str(w)))
         elif current_board.player == 1:
             root = UCT_search(current_board, 777, black, t)
             policy = get_policy(root, t)
             print("Policy: ", policy, "black = %s" % (str(b)))
         current_board = do_decode_n_move_pieces(current_board,\
                                                 np.random.choice(np.array([0,1,2,3,4,5,6]), \
                                                                  p = policy)) # decode move and move piece(s)
         if current_board.check_winner() == True:  # someone wins
             if current_board.player == 0:  # black wins
                 value = -1
             elif current_board.player == 1:  # white wins
                 value = 1
             checkmate = True
     dataset.append(ed.encode_board(current_board))
     if value == -1:
         dataset.append(f"{b} as black wins")
         return b, dataset
     elif value == 1:
         dataset.append(f"{w} as white wins")
         return w, dataset
     else:
         dataset.append("Nobody wins")
         return None, dataset
Ejemplo n.º 2
0
def MCTS_self_play(connectnet, num_games, start_idx, cpu, args, iteration):
    logger.info("[CPU: %d]: Starting MCTS self-play..." % cpu)

    if not os.path.isdir("./datasets/iter_%d" % iteration):
        if not os.path.isdir("datasets"):
            os.mkdir("datasets")
        os.mkdir("datasets/iter_%d" % iteration)

    for idxx in tqdm(range(start_idx, num_games + start_idx)):
        logger.info("[CPU: %d]: Game %d" % (cpu, idxx))
        current_board = c_board()
        checkmate = False
        dataset = []  # to get state, policy, value for neural network training
        states = []
        value = 0
        move_count = 0
        while checkmate == False and current_board.actions() != []:
            if move_count < 11:
                t = args.temperature_MCTS
            else:
                t = 0.1
            states.append(copy.deepcopy(current_board.current_board))
            board_state = copy.deepcopy(ed.encode_board(current_board))
            root = UCT_search(current_board, 777, connectnet, t)
            policy = get_policy(root, t)
            print("[CPU: %d]: Game %d POLICY:\n " % (cpu, idxx), policy)
            current_board = do_decode_n_move_pieces(current_board,\
                                                    np.random.choice(np.array([0,1,2,3,4,5,6]), \
                                                                     p = policy)) # decode move and move piece(s)
            dataset.append([board_state, policy])
            print(
                "[Iteration: %d CPU: %d]: Game %d CURRENT BOARD:\n" %
                (iteration, cpu, idxx), current_board.current_board,
                current_board.player)
            print(" ")
            if current_board.check_winner() == True:  # if somebody won
                if current_board.player == 0:  # black wins
                    value = -1
                elif current_board.player == 1:  # white wins
                    value = 1
                checkmate = True
            move_count += 1
        dataset_p = []
        for idx, data in enumerate(dataset):
            s, p = data
            if idx == 0:
                dataset_p.append([s, p, 0])
            else:
                dataset_p.append([s, p, value])
        del dataset
        save_as_pickle("iter_%d/" % iteration +\
                       "dataset_iter%d_cpu%i_%i_%s" % (iteration, cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")), dataset_p)
Ejemplo n.º 3
0
def UCT_search(game_state, num_reads,net):
    root = UCTNode(game_state, move=None, parent=DummyNode())
    for i in range(num_reads):
        leaf = root.select_leaf()
        encoded_s = ed.encode_board(leaf.game); encoded_s = encoded_s.transpose(2,0,1)
        encoded_s = torch.from_numpy(encoded_s).float().cuda()
        child_priors, value_estimate = net(encoded_s)
        child_priors = child_priors.detach().cpu().numpy().reshape(-1); value_estimate = value_estimate.item()
        if leaf.game.check_status() == True and leaf.game.in_check_possible_moves() == []: # if checkmate
            leaf.backup(value_estimate); continue
        leaf.expand(child_priors) # need to make sure valid moves
        leaf.backup(value_estimate)
    return np.argmax(root.child_number_visits), root
Ejemplo n.º 4
0
 def play_round(self):
     if np.random.uniform(0, 1) <= 0.5:
         white = self.current
         black = self.best
         w = "current"
         b = "best"
     else:
         white = self.best
         black = self.current
         w = "best"
         b = "current"
     current_board = c_board()
     checkmate = False
     states = []
     dataset = []
     value = 0
     while checkmate == False and current_board.move_count <= 100:
         draw_counter = 0
         for s in states:
             if np.array_equal(current_board.current_board, s):
                 draw_counter += 1
         if draw_counter == 3:  # draw by repetition
             break
         states.append(copy.deepcopy(current_board.current_board))
         board_state = copy.deepcopy(ed.encode_board(current_board))
         dataset.append(board_state)
         if current_board.player == 0:
             best_move, _ = UCT_search(current_board, 777, white)
         elif current_board.player == 1:
             best_move, _ = UCT_search(current_board, 777, black)
         current_board = do_decode_n_move_pieces(
             current_board, best_move)  # decode move and move piece(s)
         print(current_board.current_board, current_board.move_count)
         print(" ")
         if current_board.check_status(
         ) == True and current_board.in_check_possible_moves(
         ) == []:  # checkmate
             if current_board.player == 0:  # black wins
                 value = -1
             elif current_board.player == 1:  # white wins
                 value = 1
             checkmate = True
     dataset.append(value)
     if value == -1:
         return b, dataset
     elif value == 1:
         return w, dataset
     else:
         return None, dataset
Ejemplo n.º 5
0
def UCT_search(game_state, num_reads, net, temp):
    root = UCTNode(game_state, move=None, parent=DummyNode())
    for i in range(num_reads):
        leaf = root.select_leaf()
        encoded_s = ed.encode_board(leaf.game)
        encoded_s = encoded_s.transpose(2, 0, 1)
        encoded_s = torch.from_numpy(encoded_s).float().cuda()
        child_priors, value_estimate = net(encoded_s)
        child_priors = child_priors.detach().cpu().numpy().reshape(-1)
        value_estimate = value_estimate.item()
        if leaf.game.check_winner() == True or leaf.game.actions(
        ) == []:  # if somebody won or draw
            leaf.backup(value_estimate)
            continue
        leaf.expand(child_priors)  # need to make sure valid moves
        leaf.backup(value_estimate)
    return root
Ejemplo n.º 6
0
def MCTS_self_play(chessnet, num_games, cpu):
    for idxx in range(0, num_games):
        current_board = c_board()
        checkmate = False
        dataset = []  # to get state, policy, value for neural network training
        states = []
        value = 0
        while checkmate == False and current_board.move_count <= 100:
            draw_counter = 0
            for s in states:
                if np.array_equal(current_board.current_board, s):
                    draw_counter += 1
            if draw_counter == 3:  # draw by repetition
                break
            states.append(copy.deepcopy(current_board.current_board))
            board_state = copy.deepcopy(ed.encode_board(current_board))
            best_move, root = UCT_search(current_board, 777, chessnet)
            current_board = do_decode_n_move_pieces(
                current_board, best_move)  # decode move and move piece(s)
            policy = get_policy(root)
            dataset.append([board_state, policy])
            print(current_board.current_board, current_board.move_count)
            print(" ")
            if current_board.check_status(
            ) == True and current_board.in_check_possible_moves(
            ) == []:  # checkmate
                if current_board.player == 0:  # black wins
                    value = -1
                elif current_board.player == 1:  # white wins
                    value = 1
                checkmate = True

        dataset_p = []
        for idx, data in enumerate(dataset):
            s, p = data
            if idx == 0:
                dataset_p.append([s, p, 0])
            else:
                dataset_p.append([s, p, value])
        del dataset
        save_as_pickle(
            "dataset_cpu%i_%i_%s" %
            (cpu, idxx, datetime.datetime.today().strftime("%Y-%m-%d")),
            dataset_p)
Ejemplo n.º 7
0
def play_game(net):
    # Asks human what he/she wanna play as
    white = None
    black = None
    while (True):
        play_as = input(
            "What do you wanna play as? (\"O\"/\"X\")? Note: \"O\" starts first, \"X\" starts second\n"
        )
        if play_as == "O":
            black = net
            break
        elif play_as == "X":
            white = net
            break
        else:
            print("I didn't get that.")
    current_board = cboard()
    checkmate = False
    dataset = []
    value = 0
    t = 0.1
    moves_count = 0
    while checkmate == False and current_board.actions() != []:
        if moves_count <= 5:
            t = 1
        else:
            t = 0.1
        moves_count += 1
        dataset.append(copy.deepcopy(ed.encode_board(current_board)))
        print(current_board.current_board)
        print(" ")
        if current_board.player == 0:
            if white != None:
                print("AI is thinking........")
                root = UCT_search(current_board, 777, white, t)
                policy = get_policy(root, t)
            else:
                while (True):
                    col = input(
                        "Which column do you wanna drop your piece? (Enter 1-7)\n"
                    )
                    if int(col) in [1, 2, 3, 4, 5, 6, 7]:
                        policy = np.zeros([7], dtype=np.float32)
                        policy[int(col) - 1] += 1
                        break
        elif current_board.player == 1:
            if black != None:
                print("AI is thinking.............")
                root = UCT_search(current_board, 777, black, t)
                policy = get_policy(root, t)
            else:
                while (True):
                    col = input(
                        "Which column do you wanna drop your piece? (Enter 1-7)\n"
                    )
                    if int(col) in [1, 2, 3, 4, 5, 6, 7]:
                        policy = np.zeros([7], dtype=np.float32)
                        policy[int(col) - 1] += 1
                        break
        current_board = do_decode_n_move_pieces(current_board,\
                                                np.random.choice(np.array([0,1,2,3,4,5,6]), \
                                                                 p = policy)) # decode move and move piece(s)
        if current_board.check_winner() == True:  # someone wins
            if current_board.player == 0:  # black wins
                value = -1
            elif current_board.player == 1:  # white wins
                value = 1
            checkmate = True
    dataset.append(ed.encode_board(current_board))
    print(current_board.current_board)
    print(" ")
    if value == -1:
        if play_as == "O":
            dataset.append(f"AI as black wins")
            print("YOU LOSE!!!!!!!")
        else:
            dataset.append(f"Human as black wins")
            print("YOU WIN!!!!!!!")
        return "black", dataset
    elif value == 1:
        if play_as == "O":
            dataset.append(f"Human as white wins")
            print("YOU WIN!!!!!!!!!!!")
        else:
            dataset.append(f"AI as white wins")
            print("YOU LOSE!!!!!!!")
        return "white", dataset
    else:
        dataset.append("Nobody wins")
        print("DRAW!!!!!")
        return None, dataset