예제 #1
0
def generate_move(net: Connect4Network, current_board: Board,
                  args: AlphaZeroArgs) -> Column:
    """
    Generates move with low randomness of given Board.
    Used for actual playing
    @param net: Neural Network Object
    @param current_board: current board state
    @param args: AlphaZeroArgs
    @return: Column index
    """
    if current_board.check_winner() or current_board.is_final():
        raise AssertionError("Game is already over.")

    # Low temperature
    t = 0.1

    # Run a UCT_search for current board
    root = UCT_search(current_board, args.num_reads_mcts, net)

    policy = get_policy(root, t)
    print(f"Policy: {policy}")

    # Determines move from policy
    move = np.random.choice(np.array([0, 1, 2, 3, 4, 5, 6]), p=policy)
    print(f"Playing col: {move}")
    return move
예제 #2
0
def feature1(board: Board, player: Player) -> float:
    """
    Feature 1 checks for four connected stones (win).

    :param board: The board to check against
    :param player: The player to check for
    :return: The heuristic for feature 1
    """
    if board.check_winner(player):
        return np.inf
    else:
        return float(0)
예제 #3
0
    def test_feature1(self):
        # |==============|
        # |              |
        # |              |
        # |              |
        # |              |
        # |O O O         |
        # |X X X X       |
        # |==============|
        # |0 1 2 3 4 5 6 |
        b = Board()
        b.drop_piece(0, PLAYER_1)
        b.drop_piece(0, PLAYER_2)
        b.drop_piece(1, PLAYER_1)
        b.drop_piece(1, PLAYER_2)
        b.drop_piece(2, PLAYER_1)
        b.drop_piece(2, PLAYER_2)
        b.drop_piece(3, PLAYER_1)

        self.assertTrue(b.check_winner(PLAYER_1))
        self.assertFalse(b.check_winner(PLAYER_2))

        self.assertEqual(heuristic_1.feature1(b, PLAYER_1), np.inf)
예제 #4
0
    def play_round(self,
                   num_reads: int) -> Tuple[Optional[str], List[np.ndarray]]:
        """
        Evaluate the trained network by playing matches between the current and the previous NN
        @param num_reads: see args
        """
        print("Starting game round...")
        # randomly choose starting player
        if np.random.uniform(0, 1) <= 0.5:
            white = self.current
            black = self.best
            w = "current"
            b = "best"
        else:
            white = self.best
            black = self.current
            w = "best"
            b = "current"

        # initializing
        current_board = Board()
        game_won = False
        dataset = []
        value = 0
        temperature = 0.1  # exploration vs exploitation factor (smaller -> more exploitation)

        while not game_won and current_board.is_playable():
            dataset.append(copy.deepcopy(current_board.encode()))
            # get Policy
            if current_board.player == PLAYER_1:
                root = UCT_search(current_board, num_reads, white)
                policy = get_policy(root, temperature)
                print("Policy: ", policy, "white = %s" % (str(w)))
            elif current_board.player == PLAYER_2:
                root = UCT_search(current_board, num_reads, black)
                policy = get_policy(root, temperature)
                print("Policy: ", policy, "black = %s" % (str(b)))
            else:
                raise AssertionError("Invalid player.")
            # Chose a Column with given policy
            col_choice = np.random.choice(np.array([0, 1, 2, 3, 4, 5, 6]),
                                          p=policy)

            current_board.drop_piece(col_choice)  # move piece
            print(current_board)
            if current_board.check_winner():  # someone wins
                if current_board.player == PLAYER_1:  # black wins
                    value = -1
                elif current_board.player == PLAYER_2:  # white wins
                    value = 1
                game_won = True
        # Append new board to the dataset encoded in one-hot-encoding manner
        dataset.append(current_board.encode())
        if value == -1:
            dataset.append(f"{b} as black wins")
            return b, dataset
        elif value == 1:
            dataset.append(f"{w} as white wins")
            return w, dataset
        else:
            dataset.append("Nobody wins")
            return None, dataset
예제 #5
0
def self_play(net: Connect4Network, start_index: np.int, cpu_index: np.int,
              num_games: np.int, args: AlphaZeroArgs, iteration: np.int):
    """
    Self Play of AlphaZero, generating and saving Datasets for the training of the Neural Network
    @param net:
    @param start_index: Start index of Self Play games
    @param cpu_index:
    @param num_games:
    @param args:
    @param iteration: current Iteration
    """

    # number of more random moves, before lowering temp
    n_max_moves = 11

    print(f"CPU={cpu_index}: Starting MCTS")
    iteration_dir = f"./datasets/iter_{iteration}"

    if not os.path.isdir(iteration_dir):
        os.makedirs(iteration_dir)

    # Play self play games
    for idx in range(start_index, num_games + start_index):
        print(f"Game {idx}")

        current_board = Board()
        game_won = False  # indicates that a game is won

        dataset = []
        states = []
        value = 0
        move_count = 0

        while not game_won and current_board.is_playable():
            t = 0.1
            # less random further into the game
            if move_count < n_max_moves:
                t = args.temperature_mcts

            # save current board state (encoded and unencoded)
            states.append(current_board.current_board.copy())
            board_state = current_board.encode().copy()

            root = UCT_search(current_board, args.num_reads_mcts, net)

            policy = get_policy(root, t)
            print(f"Game {idx} policy: {policy}")

            col_choice = np.random.choice(np.array([0, 1, 2, 3, 4, 5, 6]),
                                          p=policy)

            current_board.drop_piece(col_choice)  # move piece

            dataset.append([board_state, policy])
            print(f"[Iteration: {iteration}]: Game {idx} CURRENT BOARD:\n",
                  current_board)

            move_count += 1
            if current_board.check_winner():  # if somebody won
                if current_board.player == PLAYER_1:  # black wins
                    print("Black wins")
                    value = -1
                elif current_board.player == PLAYER_2:  # white wins
                    print("White wins")
                    value = 1
                game_won = True

        dataset_p = []

        for idx, data in enumerate(dataset):
            s, p = data
            if idx == 0:
                dataset_p.append([s, p, 0])
            else:
                dataset_p.append([s, p, value])

        # Save the dataset
        time_string = datetime.datetime.today().strftime("%Y-%m-%d")
        pickle_file = f"iter_{iteration}/dataset_iter{iteration}_cpu{cpu_index}_{idx}_{time_string}"
        util.pickle_save(pickle_file, dataset_p)