Example #1
0
def make_sarsa_rl_agent(processor: Processor_56x5, nbr_layers=2):
    model = processor.create_model(nbr_layers=nbr_layers)
    test_policy = GreedyQPolicy()

    sarsa_agent = SarsaAgent(model=model,
                             nb_actions=NBR_TICHU_ACTIONS,
                             nb_steps_warmup=10,
                             gamma=0.99,
                             test_policy=test_policy,
                             processor=processor)
    sarsa_agent.compile(Adam(lr=1e-3), metrics=['mae'])
    return sarsa_agent
Example #2
0
def test_sarsa():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1,)))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))

    policy = EpsGreedyQPolicy(eps=.1)
    sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=50, policy=policy)
    sarsa.compile(Adam(lr=1e-3))

    sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0)
    policy.eps = 0.
    h = sarsa.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
print('Observations: ', (1, ) + env.observation_space.shape)

# create model
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

# configure agent
policy = BoltzmannQPolicy()
dqn = SarsaAgent(model=model,
                 nb_actions=nb_actions,
                 nb_steps_warmup=10,
                 policy=policy)
dqn.compile(Adam(lr=1e-5), metrics=['mae'])

# run agent
history = dqn.fit(env,
                  nb_steps=10000,
                  visualize=False,
                  verbose=1,
                  log_interval=100)
plt.plot(history.history['episode_reward'])
plt.show()
Example #4
0
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env, nb_episodes=5, visualize=False)

else:
    # SARSA
    # SARSA does not require a memory.
    policy = BoltzmannQPolicy()

    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(state_size/2))
    model.add(Activation('relu'))
    model.add(Dense(state_size/4))
    model.add(Activation('relu'))
    model.add(Dense(state_size/8))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions, activation='linear'))
    print(model.summary())

    sarsa = SarsaAgent(model=model, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy)
    sarsa.compile(Adam(lr=1e-3), metrics=['mae'])

    sarsa.fit(env, nb_steps=500000, visualize=False, verbose=1)

    # After training is done, we save the final weights.
    sarsa.save_weights('weights.h5f', overwrite=True)

    # Finally, evaluate our algorithm for 5 episodes.
    sarsa.test(env, nb_episodes=5, visualize=False)