def basic_usage_example(): # Basic Usage: Training, Saving, Loading. # Create environment. env = gym.make("LunarLander-v2") # Instantiate the agent. model = DQN("MlpPolicy", env, verbose=1) # Train the agent. model.learn(total_timesteps=int(2e5)) # Save the agent. model.save("dqn_lunar") del model # Delete trained model to demonstrate loading. # Load the trained agent. # NOTE: if you have loading issue, you can pass 'print_system_info=True' # to compare the system on which the model was trained vs the current one. #model = DQN.load("dqn_lunar", env=env, print_system_info=True) model = DQN.load("dqn_lunar", env=env) # Evaluate the agent. # NOTE: If you use wrappers with your environment that modify rewards, # this will be reflected here. To evaluate with original rewards, # wrap environment in a "Monitor" wrapper before other wrappers. mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) # Enjoy trained agent. obs = env.reset() for i in range(1000): action, _states = model.predict(obs, deterministic=True) obs, rewards, dones, info = env.step(action) env.render()
from stable_baselines3 import DQN, PPO, A2C from stable_baselines3.common.cmd_util import make_vec_env from stable_baselines3.common.evaluation import evaluate_policy # Instantiate the env env = ABCEnv() # wrap it env = make_vec_env(lambda: env, n_envs=1) # Train the agent """ Something you might want to play around with, learning_rate, total timesteps etc.. Always choose a sample efficient algorithm """ total_timesteps = 200 model = DQN('MlpPolicy', env, verbose=1, tensorboard_log="./CSC2547_tensorboard/") model.learn(total_timesteps) model_name = "DQN_timesteps_" + str(total_timesteps) model.save(model_name) model.load(model_name, env=env) mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=2) print("mean_reward is: ", mean_reward) print("std_reward is: ", std_reward)
## Fishing with DQN example import gym import gym_fishing from stable_baselines3 import DQN from stable_baselines3.common.evaluation import evaluate_policy # Create environment env = gym.make('fishing-v0') # Instantiate the agent model = DQN('MlpPolicy', env, verbose=0) # Train the agent model.learn(total_timesteps=int(1e5)) ## simulate and plot results df = env.simulate(model, reps=10) env.plot(df, "results/dqn.png") df = env.estimate_policyfn(model, reps=10) # Evaluate the agent mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=50) print("mean reward:", mean_reward, "std:", std_reward) # Save the agent model.save("results/dqn_fish_v0")