Example #1
0
               memory=memory,
               nb_steps_warmup=100,
               enable_dueling_network=True,
               dueling_type='avg',
               target_model_update=1e-3,
               policy=policy)
dqn.compile(Adam(lr=1e-4), metrics=['mae'])
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.

history = dqn.learning(env,
                       Given_policy,
                       policy_list,
                       nb_steps=5e6,
                       visualize=False,
                       log_interval=1000,
                       verbose=2,
                       nb_max_episode_steps=1000,
                       imitation_leaning_time=0,
                       reinforcement_learning_time=1e10)
sio.savemat(ENV_NAME + '-' + nowtime + '/fit.mat', history.history)
# After training is done, we save the final weights.

dqn.save_weights(ENV_NAME + '-' + nowtime + '/fit-weights.h5f', overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
history = dqn.test(env,
                   nb_episodes=10,
                   visualize=True,
                   nb_max_episode_steps=5000)
sio.savemat(ENV_NAME + '-' + nowtime + '/test.mat', history.history)