예제 #1
0
from agents import CommandLineAgent
from environments import GridWorldGameState
from runners import run_to_the_end

if __name__ == "__main__":
    gs = GridWorldGameState()
    agent = CommandLineAgent()

    print(gs)
    run_to_the_end([agent], gs)
    print(gs)
from agents import TabularLikeDeepQLearningAgent
from environments import GridWorldGameState
from runners import run_for_n_games_and_print_stats, run_step

if __name__ == "__main__":
    gs = GridWorldGameState()
    agent = TabularLikeDeepQLearningAgent(action_space_size=4)

    for i in range(500):
        run_for_n_games_and_print_stats([agent], gs, 100)

    agent.epsilon = -1.0
    run_for_n_games_and_print_stats([agent], gs, 100)

    gs = gs.clone()
    while not gs.is_game_over():
        run_step([agent], gs)
        print(gs)
from agents import RandomAgent, PPOAgent
from environments import GridWorldGameState
from runners import run_to_the_end, run_for_n_games_and_print_stats, run_step
import tensorflow as tf

if __name__ == "__main__":
    tf.compat.v1.disable_eager_execution()
    gs = GridWorldGameState()
    agent = PPOAgent(state_space_size=gs.get_vectorized_state().shape[0],
                     action_space_size=gs.get_action_space_size())
    for i in range(20):
        print(i)
        run_for_n_games_and_print_stats([agent], gs, 300)
        while not gs.is_game_over():
            run_step([agent], gs)
            print(gs)

        gs.__init__()
    print(gs)
    run_to_the_end([agent], gs)
    print(gs)