dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=100000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. #dqn.test(env, nb_episodes=5, visualize=True) kvar = K.variable(env.isd.reshape((1, ) + env.observation_space.shape)) y = K.eval(dqn.model(kvar)) a0 = np.argmax(y) print(a0) for i in range(1, 10): s1 = env.step(a0)[0] kvar = tf.transpose(K.variable(s1)) y = K.eval(dqn.model(kvar)) a0 = np.argmax(y) print(a0) for i in range(1, 10): print(a0) s1 = env.step(a0) kvar = K.variable(s1[0].reshape((1, ) + env.observation_space.shape)) y = dqn.model(kvar)