예제 #1
0
def minimax_alpha_beta(moves_log,
                       state: game.GameState,
                       max_player: MinimaxPlayer,
                       max_depth=5,
                       depth=0,
                       alpha=-INF,
                       beta=INF) -> (int, int):
    winner = state.get_winner()
    if winner is not None:
        # TODO: consider depth
        score = INF - depth if winner is max_player else -INF + depth
        return NO_MOVE, score  # state.eval_state()

    moves = game.to_non_empty(state.get_moves())
    if not moves:
        return NO_MOVE, TIE_SCORE

    moves = list(moves)
    random.shuffle(moves)

    if depth >= max_depth:
        return NO_MOVE, state.eval()

    is_max_player = (depth % 2 == 0)
    best_moves = []
    best_score = None
    for move in moves:
        recursive_score = \
            minimax_alpha_beta(moves_log + [move], state.move(move), max_player, max_depth, depth + 1, alpha, beta)[1]
        if is_max_player:
            if best_score is None or recursive_score > best_score:
                best_score = recursive_score
                best_moves = [move]
                if best_score > alpha:
                    alpha = recursive_score
            elif recursive_score == best_score:
                best_moves.append(move)
        else:
            if best_score is None or recursive_score < best_score:
                best_score = recursive_score
                best_moves = [move]
                if best_score < beta:
                    beta = recursive_score
            elif recursive_score == best_score:
                best_moves.append(move)

        if alpha > beta:
            break

    # print(f'Depth {depth}/{max_depth}: [{",".join(str(i) for i in moves_log)}] | '
    #       f'As {state.get_curr_player().get_char()}, '
    #       f'my best moves are: [{",".join(str(i) for i in best_moves)}] with score: {best_score}')

    return random.choice(best_moves) if best_moves else None, best_score
                black_win_prob_history_buffer.append(win_prob)

                print("Black win prob is : ", str(win_prob), " value (-1 ~ 1)")
            else:
                action, win_prob = white_model.get_move(WHITE, Go_Game)
                white_win_prob_history_buffer.append(win_prob)

                print("White win prob is : ", str(win_prob), " value (-1 ~ 1)")

        # - Append move_history
        move_history_history_buffer.append(action)

        # - Act move & get results
        # - Break if game is finished
        if (Go_Game.do_move(action) is True):
            winner, black_go, white_go = Go_Game.get_winner()

            board = np.array(Go_Game.show_result())
            break

        # - Draw game's current state(Board)
        board = np.array(Go_Game.show_result())
        Draw_Plot(board)
        move_count += 1

    # - End of collosseum
    del Go_Game

    # Record History
    if (winner is BLACK):
        Record(
def Game_Collosseum(
        black_model,
        white_model,
        game_count,
        add_sample=True,
        debug_mode=0):  # Debug mode 0 means it will not use debugging

    print("Enter Collosseum")

    Go_Game = GameState()  # board size = 19

    # History buffers
    white_win_prob_history_buffer = []
    black_win_prob_history_buffer = []
    move_history_buffer = []  # First player is black

    black_inputs_array = []
    white_inputs_array = []

    black_spot_prob = []
    white_spot_prob = []

    try:
        while (True):
            current_player = Go_Game.get_current_player()

            # - Calculate Proper action
            # - Append prob_history
            if (current_player is BLACK):
                action, win_prob, _, board_input, spot_prob = black_model.get_move(
                    BLACK, Go_Game, game_count, debug_mode=debug_mode)
                black_win_prob_history_buffer.append(win_prob)
                black_inputs_array.append(board_input)
                black_spot_prob.append(spot_prob)

                print("Black win prob is : ", str(win_prob), " , ",
                      str(win_prob), " value (-1 ~ 1)")
            else:
                action, win_prob, _, board_input, spot_prob = white_model.get_move(
                    WHITE, Go_Game, game_count, debug_mode=debug_mode)
                white_win_prob_history_buffer.append(win_prob)
                white_inputs_array.append(board_input)
                white_spot_prob.append(spot_prob)

                print("White win prob is : ", str(win_prob), " , ",
                      str(1 - win_prob), " value (-1 ~ 1)")

            # - Append move_history
            move_history_buffer.append(action)

            # - Act move & get results
            # - Break while if game is finished
            if (set_move(Go_Game, action) is True):
                winner, black_score, white_score = Go_Game.get_winner()

                board = np.array(Go_Game.show_result())
                break

            # - Draw game's current state(Board)
            board = np.array(Go_Game.show_result())
            Draw_Plot(board)

        # - Add sample
        if (add_sample is True):
            black_model.add_samples(black_inputs_array, black_spot_prob)
            white_model.add_samples(white_inputs_array, white_spot_prob)

        # - End of collosseum
        del Go_Game
        return winner, board, move_history_buffer, black_win_prob_history_buffer, white_win_prob_history_buffer, black_score, white_score

    # Exception (Error handling)
    except KeyboardInterrupt:
        return