Python TicTacToeGameState 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: drl_gym.environments.tictactoe

클래스/타입: TicTacToeGameState

hotexamples.com에서의 예제들: 6

Python TicTacToeGameState - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 drl_gym.environments.tictactoe.TicTacToeGameState에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

get_action_space_size(3)

TicTacToeGameState(1)

clone(1)

get_vectorized_state(1)

예제 #1

파일 보기

from drl_gym.agents import CommandLineAgent, RandomAgent
from drl_gym.environments.tictactoe import TicTacToeGameState
from drl_gym.runners import run_to_the_end

if __name__ == "__main__":
    gs = TicTacToeGameState()
    agent0 = CommandLineAgent()
    agent1 = RandomAgent()

    print(gs)
    run_to_the_end([agent0, agent1], gs)
    print(gs)

예제 #2

파일 보기

from drl_gym.agents import RandomAgent, HalfAlphaZeroAgent
from drl_gym.environments.tictactoe import TicTacToeGameState
from drl_gym.runners import run_for_n_games_and_print_stats

if __name__ == "__main__":

    import tensorflow as tf

    tf.compat.v1.disable_eager_execution()

    gs = TicTacToeGameState()
    agent0 = HalfAlphaZeroAgent(10,
                                gs.get_action_space_size(),
                                keep_memory=True)
    agent1 = RandomAgent()

    for _ in range(1000):
        run_for_n_games_and_print_stats([agent0, agent1],
                                        gs,
                                        100,
                                        shuffle_players=True)

예제 #3

파일 보기

from drl_gym.agents import TabQLearningAgent, CommandLineAgent
from drl_gym.environments.tictactoe import TicTacToeGameState
from drl_gym.runners import run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    gs = TicTacToeGameState()
    agent0 = TabQLearningAgent()
    agent1 = TabQLearningAgent()
    agent0.alpha = 0.1
    agent0.epsilon = 0.005
    agent1.alpha = 0.1
    agent1.epsilon = 0.005

    for _ in range(100):
        run_for_n_games_and_print_stats([agent0, agent1], gs, 5000)

    agent0.epsilon = -1.0
    agent1.epsilon = -1.0
    run_for_n_games_and_print_stats([agent0, agent1], gs, 100)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([agent0, CommandLineAgent()], gs_clone)
        print(gs_clone)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([CommandLineAgent(), agent1], gs_clone)
        print(gs_clone)

예제 #4

파일 보기

from drl_gym.agents import CommandLineAgent, PPOAgent, RandomAgent
from drl_gym.environments.tictactoe import TicTacToeGameState
from drl_gym.runners import run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    gs = TicTacToeGameState()
    agent0 = PPOAgent(
        state_space_size=gs.get_vectorized_state().shape[0],
        action_space_size=gs.get_action_space_size(),
    )
    agent1 = RandomAgent()

    for i in range(100):
        run_for_n_games_and_print_stats([agent0, agent1], gs, 5000)

    run_for_n_games_and_print_stats([agent0, agent1], gs, 100)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([agent0, CommandLineAgent()], gs_clone)
        print(gs_clone)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([CommandLineAgent(), agent1], gs_clone)
        print(gs_clone)

예제 #5

파일 보기

from drl_gym.agents import CommandLineAgent, DeepQLearningAgent
from drl_gym.environments.tictactoe import TicTacToeGameState
from drl_gym.runners import run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    gs = TicTacToeGameState()
    agent0 = DeepQLearningAgent(action_space_size=gs.get_action_space_size())
    agent1 = DeepQLearningAgent(action_space_size=gs.get_action_space_size())
    agent0.alpha = 0.1
    agent0.epsilon = 0.005
    agent1.alpha = 0.1
    agent1.epsilon = 0.005

    for i in range(100):
        run_for_n_games_and_print_stats([agent0, agent1], gs, 5000)

    agent0.epsilon = -1.0
    agent1.epsilon = -1.0
    run_for_n_games_and_print_stats([agent0, agent1], gs, 100)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([agent0, CommandLineAgent()], gs_clone)
        print(gs_clone)

    gs_clone = gs.clone()
    while not gs_clone.is_game_over():
        run_step([CommandLineAgent(), agent1], gs_clone)
        print(gs_clone)

예제 #6

파일 보기

from drl_gym.agents import RandomAgent, ExpertApprenticeAgent
from drl_gym.environments.tictactoe import TicTacToeGameState
from drl_gym.runners import run_for_n_games_and_print_stats

if __name__ == "__main__":

    import tensorflow as tf

    tf.compat.v1.disable_eager_execution()
    gs = TicTacToeGameState()
    agent0 = ExpertApprenticeAgent(100, gs.get_action_space_size())
    agent1 = RandomAgent()

    for _ in range(1000):
        run_for_n_games_and_print_stats([agent0, agent1], gs, 1000)