Esempio n. 1
0
def basic_usage_example():
    # Basic Usage: Training, Saving, Loading.

    # Create environment.
    env = gym.make("LunarLander-v2")

    # Instantiate the agent.
    model = DQN("MlpPolicy", env, verbose=1)
    # Train the agent.
    model.learn(total_timesteps=int(2e5))
    # Save the agent.
    model.save("dqn_lunar")
    del model  # Delete trained model to demonstrate loading.

    # Load the trained agent.
    # NOTE: if you have loading issue, you can pass 'print_system_info=True'
    # to compare the system on which the model was trained vs the current one.
    #model = DQN.load("dqn_lunar", env=env, print_system_info=True)
    model = DQN.load("dqn_lunar", env=env)

    # Evaluate the agent.
    # NOTE: If you use wrappers with your environment that modify rewards,
    #	this will be reflected here. To evaluate with original rewards,
    #	wrap environment in a "Monitor" wrapper before other wrappers.
    mean_reward, std_reward = evaluate_policy(model,
                                              model.get_env(),
                                              n_eval_episodes=10)

    # Enjoy trained agent.
    obs = env.reset()
    for i in range(1000):
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, dones, info = env.step(action)
        env.render()
Esempio n. 2
0
from stable_baselines3 import DQN, PPO, A2C
from stable_baselines3.common.cmd_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

# Instantiate the env
env = ABCEnv()
# wrap it
env = make_vec_env(lambda: env, n_envs=1)

# Train the agent
"""
Something you might want to play around with, learning_rate, total timesteps etc.. 
Always choose a sample efficient algorithm
"""
total_timesteps = 200
model = DQN('MlpPolicy',
            env,
            verbose=1,
            tensorboard_log="./CSC2547_tensorboard/")
model.learn(total_timesteps)

model_name = "DQN_timesteps_" + str(total_timesteps)
model.save(model_name)

model.load(model_name, env=env)
mean_reward, std_reward = evaluate_policy(model,
                                          model.get_env(),
                                          n_eval_episodes=2)
print("mean_reward is: ", mean_reward)
print("std_reward is: ", std_reward)
Esempio n. 3
0
## Fishing with DQN example
import gym
import gym_fishing
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy

# Create environment
env = gym.make('fishing-v0')
# Instantiate the agent
model = DQN('MlpPolicy', env, verbose=0)
# Train the agent
model.learn(total_timesteps=int(1e5))

## simulate and plot results
df = env.simulate(model, reps=10)
env.plot(df, "results/dqn.png")

df = env.estimate_policyfn(model, reps=10)


# Evaluate the agent
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=50)
print("mean reward:", mean_reward, "std:", std_reward)

# Save the agent
model.save("results/dqn_fish_v0")