Example #1
0
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
#dqn.test(env, nb_episodes=5, visualize=True)

kvar = K.variable(env.isd.reshape((1, ) + env.observation_space.shape))
y = K.eval(dqn.model(kvar))
a0 = np.argmax(y)
print(a0)

for i in range(1, 10):
    s1 = env.step(a0)[0]
    kvar = tf.transpose(K.variable(s1))
    y = K.eval(dqn.model(kvar))
    a0 = np.argmax(y)
    print(a0)

for i in range(1, 10):
    print(a0)
    s1 = env.step(a0)
    kvar = K.variable(s1[0].reshape((1, ) + env.observation_space.shape))
    y = dqn.model(kvar)