memory=memory, nb_steps_warmup=100, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-3, policy=policy) dqn.compile(Adam(lr=1e-4), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. history = dqn.learning(env, Given_policy, policy_list, nb_steps=5e6, visualize=False, log_interval=1000, verbose=2, nb_max_episode_steps=1000, imitation_leaning_time=0, reinforcement_learning_time=1e10) sio.savemat(ENV_NAME + '-' + nowtime + '/fit.mat', history.history) # After training is done, we save the final weights. dqn.save_weights(ENV_NAME + '-' + nowtime + '/fit-weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. history = dqn.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=5000) sio.savemat(ENV_NAME + '-' + nowtime + '/test.mat', history.history)