# dqn.load_weights(checkpoint_weights_filename) # elif os.path.isfile(weights_filename): # print("Loading previous weights...") # dqn.load_weights(weights_filename) dqn.fit(env, callbacks=callbacks, nb_steps=20000000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = 'wts/dqn_Breakout-v0_weights_12000000_phyran.h5f'.format( args.env_name) if args.weights: weights_filename = args.weights print(env.unwrapped.get_action_meanings()) np.random.seed(None) env.seed(None) dqn.load_weights(weights_filename) dqn.training = False dqn.test_policy = EpsilonPhysicsPolicy( eps_phy=0.01, eps_ran=0.00 ) # set a small epsilon for test policy to avoid getting stuck env = gym.wrappers.Monitor(env, "records/", video_callable=lambda episode_id: True, force=True) dqn.test(env, nb_episodes=100, visualize=False) env.close()
dqn.load_weights(weights_filename) dqn.fit(env, callbacks=callbacks, nb_steps=20000000, log_interval=10000, nb_max_start_steps=20) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=1, nb_max_start_steps=20, visualize=False) elif args.mode == 'test': weights_filename = 'wts/phy_dqn_BreakoutDeterministic-v4_weights.h5f'.format( args.env_name) if args.weights: weights_filename = args.weights np.random.seed(None) env.seed(None) print(env.unwrapped.get_action_meanings()) dqn.load_weights(weights_filename) dqn.training = False dqn.test_policy = EpsGreedyQPolicy( 0.01) # set a small epsilon for test policy to avoid getting stuck env = gym.wrappers.Monitor(env, "records/", video_callable=lambda episode_id: True, force=True) dqn.test(env, nb_episodes=100, nb_max_start_steps=20, visualize=False) env.close()