Esempi in Python per Agent.save_values

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: rl.agent

Classe/tipologia: Agent

Metodo/funzione: save_values

Esempi su hotexamples.com: 3

Agent.save_values in Python: 3 esempi trovati. Questi sono i migliori esempi reali in Python per rl.agent.Agent.save_values, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Agent(18)

train(6)

save_values(3)

demo(3)

test(2)

store_transition(2)

get_action(2)

qvalue(1)

reward(1)

save_model(1)

save_models(1)

select_action(1)

save_weights(1)

load_weights(1)

start(1)

stats(1)

take_action(1)

qmatrix_to_str(1)

learn(1)

load_models(1)

act(1)

join(1)

init_scaler(1)

get_state(1)

fit(1)

eval(1)

episode_start(1)

episode_end(1)

compile(1)

choose_action(1)

argmin(1)

argmax(1)

addenv(1)

add_module(1)

update_qmatrix(1)

Esempio n. 1

Mostra file

def play_chomp(mode):
    """Start Chomp game and training."""
    print('=====CHOMP=====')
    # Square board has optimal strategy to allow for easy sanity check that agent is learning.
    game = Chomp(rows=4, cols=4)
    if mode == 'train':
        # Train agent to go first
        agent = Agent(game, epsilon=9e-3, learning_rate=25e-2)
        n = 10000
        history = agent.train(n)
        print('After {} Episodes'.format(n))

        # Plot Reward Stats
        rfig, raxs = plt.subplots(nrows=3, ncols=1)
        rax_reward1 = raxs[0]
        rax_reward1.grid()
        rax_reward2 = raxs[1]
        rax_reward2.grid()
        rax_reward3 = raxs[2]
        rax_reward3.grid()

        rax_reward1.plot(history[0][:100], history[1][:100])
        rax_reward1.set(ylabel='Cumulative Reward', title='Chomp 4x4 Cumulative Reward')

        rax_reward2.plot(history[0][:1000], history[1][:1000], color='g')
        rax_reward2.set(ylabel='Cumulative Reward')

        rax_reward3.plot(history[0][:n], history[1][:n], color='r')
        rax_reward3.set(xlabel='Episode', ylabel='Cumulative Reward')

        rfig.savefig('chomp_reward.png')

        # Plot Qtable Memory Usage Stats
        memfig, memaxs = plt.subplots(nrows=3, ncols=1)
        memax_reward1 = memaxs[0]
        memax_reward1.grid()
        memax_reward2 = memaxs[1]
        memax_reward2.grid()
        memax_reward3 = memaxs[2]
        memax_reward3.grid()

        memax_reward1.plot(history[0][:100], history[2][:100])
        memax_reward1.set(ylabel='Size (KB)', title='Chomp 4x4 QTable Size')

        memax_reward2.plot(history[0][:1000], history[2][:1000], color='g')
        memax_reward2.set(ylabel='Size (KB)')

        memax_reward3.plot(history[0][:n], history[2][:n], color='r')
        memax_reward3.set(xlabel='Episode', ylabel='Size (KB)')
        plt.show()

        agent.save_values(path='data/chomp_qtable.json')
        agent.demo()

    elif mode == 'hyper':
        # Hyper parameter optimization
        max_e = 0.0
        max_lr = 0.0
        max_reward = 0.0
        epsilons = [1e-1, 2e-1, 9e-2, 1e-2, 9e-3]
        learning_rates = [1e-1, 2e-1, 3e-1, 25e-2, 9e-2]
        for epsilon in epsilons:
            for learning_rate in learning_rates:
                agent = Agent(game, qtable={}, player='X', epsilon=epsilon, learning_rate=learning_rate)
                n = 10000
                history = agent.train(n, history=[])
                total = history[1][len(history[1]) - 1]
                print(total)
                if total > max_reward:
                    max_reward = total
                    max_e = epsilon
                    max_lr = learning_rate
        print('Max e: {}'.format(max_e))
        print('Max lr: {}'.format(max_lr))
        print('Max reward: {}'.format(max_reward))

    elif mode == 'demo':
        qtable = json.load(open('data/chomp_qtable.json'))
        agent = Agent(game, qtable=qtable)
        agent.demo()
    else:
        print('Mode {} is invalid.'.format(mode))

Esempio n. 2

Mostra file

def play_tictactoe(mode):
    """Start TicTacToe game with RL Agent."""
    print('==TIC TAC TOE==')
    game = TicTacToe()

    if mode == 'train':
        agent = Agent(game)
        history = agent.train(10000)
        print('After 10000 Episodes')

        # Plot Reward Stats
        rfig, raxs = plt.subplots(nrows=3, ncols=1)
        rax_reward1 = raxs[0]
        rax_reward1.grid()
        rax_reward2 = raxs[1]
        rax_reward2.grid()
        rax_reward3 = raxs[2]
        rax_reward3.grid()

        rax_reward1.plot(history[0][:100], history[1][:100])
        rax_reward1.set(ylabel='Cumulative Reward', title='Tic Tac Toe Cumulative Reward Episodes')

        rax_reward2.plot(history[0][:1000], history[1][:1000], color='g')
        rax_reward2.set(ylabel='Cumulative Reward')

        rax_reward3.plot(history[0][:10000], history[1][:10000], color='r')
        rax_reward3.set(xlabel='Episode', ylabel='Cumulative Reward')

        rfig.savefig('tictactoe_reward.png')

        # Plot Qtable Memory Usage Stats
        memfig, memaxs = plt.subplots(nrows=3, ncols=1)
        memax_reward1 = memaxs[0]
        memax_reward1.grid()
        memax_reward2 = memaxs[1]
        memax_reward2.grid()
        memax_reward3 = memaxs[2]
        memax_reward3.grid()

        memax_reward1.plot(history[0][:100], history[2][:100])
        memax_reward1.set(ylabel='Size (KB)', title='Tic Tac Toe QTable Size Episodes')

        memax_reward2.plot(history[0][:1000], history[2][:1000], color='g')
        memax_reward2.set(ylabel='Size (KB)')

        memax_reward3.plot(history[0][:10000], history[2][:10000], color='r')
        memax_reward3.set(xlabel='Episode', ylabel='Size (KB)')

        memfig.savefig('tictactoe_memory.png')
        plt.show()

        agent.save_values(path='data/tictactoe_qtable.json')
        agent.stats()
        agent.demo()

    elif mode == 'demo':
        qtable = json.load(open('data/tictactoe_qtable.json'))
        agent = Agent(game, qtable=qtable)
        agent.demo()

    else:
        print('Mode {} is invalid.'.format(mode))

Esempio n. 3

Mostra file

def play_connectfour(mode):
    """Start Connect Four game and training."""
    print('==CONNECT FOUR==')
    game = ConnectFour()

    if mode == 'train':
        agent = Agent(game)
        history = agent.train(10000)
        print('After 10000 Episodes')

        # Plot Reward Stats
        rfig, raxs = plt.subplots(nrows=3, ncols=1)
        rax_reward1 = raxs[0]
        rax_reward1.grid()
        rax_reward2 = raxs[1]
        rax_reward2.grid()
        rax_reward3 = raxs[2]
        rax_reward3.grid()

        rax_reward1.plot(history[0][:100], history[1][:100])
        rax_reward1.set(ylabel='Cumulative Reward', title='Connect Four Cumulative Reward (3 Column State)')

        rax_reward2.plot(history[0][:1000], history[1][:1000], color='g')
        rax_reward2.set(ylabel='Cumulative Reward')

        rax_reward3.plot(history[0][:10000], history[1][:10000], color='r')
        rax_reward3.set(xlabel='Episode', ylabel='Cumulative Reward')

        rfig.savefig('connectfour_reward.png')

        # Plot Qtable Memory Usage Stats
        memfig, memaxs = plt.subplots(nrows=3, ncols=1)
        memax_reward1 = memaxs[0]
        memax_reward1.grid()
        memax_reward2 = memaxs[1]
        memax_reward2.grid()
        memax_reward3 = memaxs[2]
        memax_reward3.grid()

        memax_reward1.plot(history[0][:100], history[2][:100])
        memax_reward1.set(ylabel='Size (KB)', title='Connect Four QTable Size (3 Column State)')

        memax_reward2.plot(history[0][:1000], history[2][:1000], color='g')
        memax_reward2.set(ylabel='Size (KB)')

        memax_reward3.plot(history[0][:10000], history[2][:10000], color='r')
        memax_reward3.set(xlabel='Episode', ylabel='Size (KB)')

        memfig.savefig('connectfour_memory.png')
        plt.show()

        agent.save_values(path='data/connectfour_qtable.json')
        agent.demo()

    elif mode == 'demo':
        qtable = json.load(open('data/connectfour_qtable.json'))
        agent = Agent(game, qtable=qtable)
        agent.demo()

    else:
        print('Mode {} is invalid.'.format(mode))