コード例 #1
0
    #     dqn.load_weights(checkpoint_weights_filename)
    # elif os.path.isfile(weights_filename):
    #     print("Loading previous weights...")
    #     dqn.load_weights(weights_filename)
    dqn.fit(env, callbacks=callbacks, nb_steps=20000000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'wts/dqn_Breakout-v0_weights_12000000_phyran.h5f'.format(
        args.env_name)
    if args.weights:
        weights_filename = args.weights
    print(env.unwrapped.get_action_meanings())
    np.random.seed(None)
    env.seed(None)
    dqn.load_weights(weights_filename)
    dqn.training = False
    dqn.test_policy = EpsilonPhysicsPolicy(
        eps_phy=0.01, eps_ran=0.00
    )  # set a small epsilon for test policy to avoid getting stuck
    env = gym.wrappers.Monitor(env,
                               "records/",
                               video_callable=lambda episode_id: True,
                               force=True)
    dqn.test(env, nb_episodes=100, visualize=False)
    env.close()
コード例 #2
0
        dqn.load_weights(weights_filename)
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=20000000,
            log_interval=10000,
            nb_max_start_steps=20)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=1, nb_max_start_steps=20, visualize=False)
elif args.mode == 'test':
    weights_filename = 'wts/phy_dqn_BreakoutDeterministic-v4_weights.h5f'.format(
        args.env_name)
    if args.weights:
        weights_filename = args.weights
    np.random.seed(None)
    env.seed(None)
    print(env.unwrapped.get_action_meanings())
    dqn.load_weights(weights_filename)
    dqn.training = False
    dqn.test_policy = EpsGreedyQPolicy(
        0.01)  # set a small epsilon for test policy to avoid getting stuck
    env = gym.wrappers.Monitor(env,
                               "records/",
                               video_callable=lambda episode_id: True,
                               force=True)
    dqn.test(env, nb_episodes=100, nb_max_start_steps=20, visualize=False)
    env.close()