Python DQNAgent.test Examples

Programming Language: Python

Namespace/Package Name: dqn

Class/Type: DQNAgent

Method/Function: test

Examples at hotexamples.com: 5

Python DQNAgent.test - 5 examples found. These are the top rated real world Python examples of dqn.DQNAgent.test extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DQNAgent(30)

act(13)

load(11)

compile(8)

fit(5)

save(5)

train(5)

replay(5)

test(4)

save_weights(4)

remember(4)

get_action(4)

load_model(4)

actDeterministically(4)

epsilon(3)

save_model(3)

load_weights(3)

target_model(2)

observe(2)

start(2)

get_last_observations(2)

end(2)

train_one_episode(1)

train_model(1)

trainAgent(1)

train_only(1)

update_epoch(1)

update_replay_memory(1)

test_one_episode(1)

test_model(1)

update_target(1)

store_transition(1)

train_rnn(1)

testAgent(1)

update_target_model(1)

train_vae(1)

training(1)

restart_epoch(1)

store_experience(1)

load_state_dict(1)

__init__(1)

act_2(1)

append_sample(1)

backword(1)

fill_memory(1)

get_test_loss(1)

learn(1)

loss(1)

step(1)

parameters(1)

Example #1

Show file

def play_it():
    #ENV_NAME = 'CartPole-v0'
    #ENV_NAME = 'MountainCar-v0'
    ENV_NAME = 'Single_virtual-v0'
    # Get the environment and extract the number of actions.
    env = make(ENV_NAME)
    env1 = make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    model = build_model(nb_actions,env.observation_space)
    # model = build_model1(nb_actions, env.observation_space)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy,)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME+ datetime.now().strftime("%Y%m%d-%H%M%S"))), overwrite=True)
    # dqn.load_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME)))
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env1, nb_episodes=5, visualize=True)

Example #2

Show file

def optimize_agent(trial, args):
    "Optimize the model."

    model_name = args.study_name + "_" + str(trial.number)
    env_kwargs = dict()
    callback_checkpoint_kwargs = dict()
    save_dir = args.save_dir
    log_interval = args.log_interval
    num_cpus = args.num_cpus
    eval_episodes = args.eval_episodes
    n_steps = args.n_steps
    layer_normalization = args.layer_normalization
    layers = args.layers
    env_kwargs["board_size"] = 4
    env_kwargs["binary"] = not args.no_binary
    env_kwargs["extractor"] = args.extractor
    env_kwargs["seed"] = args.seed
    env_kwargs["penalty"] = args.penalty
    callback_checkpoint_kwargs["save_freq"] = args.save_freq
    callback_checkpoint_kwargs["save_path"] = args.save_dir
    callback_checkpoint_kwargs["name_prefix"] = model_name

    if args.agent == "ppo2":
        model_kwargs = trial_hiperparameter_ppo2(trial)
        model_kwargs["agent"] = "ppo2"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model = PPO2Agent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    elif args.agent == "dqn":
        # model_kwargs = trial_hiperparameter_dqn(trial)
        model_kwargs = {}
        model_kwargs["learning_rate"] = 0.0001
        model_kwargs["batch_size"] = 10000
        model_kwargs["learning_starts"] = 10000
        model_kwargs["target_network_update_freq"] = 1000
        model_kwargs["train_freq"] = 4
        model_kwargs["agent"] = "dqn"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model_kwargs["double_q"] = True
        model_kwargs["prioritized_replay"] = True
        model_kwargs["param_noise"] = True
        print(model_kwargs)
        model = DQNAgent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            layers,
            args.load_path,
            args.num_timesteps_log,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    elif args.agent == "acer":
        model_kwargs = trial_hiperparameter_acer(trial)
        model_kwargs["agent"] = "acer"
        model_kwargs["tensorboard_log"] = args.tensorboard_log
        model_kwargs["replay_start"] = 2000
        model = ACERAgent(
            model_name,
            save_dir,
            log_interval,
            num_cpus,
            eval_episodes,
            n_steps,
            layer_normalization,
            model_kwargs,
            env_kwargs,
            callback_checkpoint_kwargs,
        )
    else:
        ValueError("Choose a valid agent model")

    model.train()
    total_score = model.test()

    return total_score

Example #3

Show file

File: dqn_atari.py Project: rajendraranabhat/S3Lab_Projects

# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.),
               target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)

    # Finally, evaluate our algorithm for 10 episodes.
    dqn.test(env, nb_episodes=10, visualize=False)
elif args.mode == 'test':
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    if args.weights:
        weights_filename = args.weights
    dqn.load_weights(weights_filename)
    dqn.test(env, nb_episodes=10, visualize=True)

Example #4

Show file

File: Tensorflow.py Project: Palmer--/54806HT18

# build model.
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(432, activation='relu'))
model.add(Dense(216, activation='relu'))
model.add(Dense(144, activation='linear'))
model.add(Dense(nb_actions))
print(model.summary())

memory = SequentialMemory(limit=500000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               nb_steps_warmup=100,
               target_model_update=1e-2,
               policy=policy)
dqn.compile(Adam(lr=1e-2), metrics=['mae'])
#dqn.load_weights('E:\\git\\TensorFlow\\dqn_Snake_weights_2_weights.h5f')
dqn.fit(env,
        nb_steps=500000,
        visualize=visualize_training,
        verbose=2,
        callbacks=[tbCallBack])
# After training is done save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format("Snake"), overwrite=True)

# evaluate our model for 5 episodes.
dqn.test(env, nb_episodes=500, visualize=True)

Example #5

Show file

File: dqn_cartpole.py Project: rajendraranabhat/S3Lab_Projects

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)