Example #1
0
log_dir = "tmp/"
os.makedirs(log_dir, exist_ok=True)

env = gym.make("OffWorldDockerMonolithDiscreteSim-v0",
               channel_type=Channels.RGB_ONLY)
time_steps = 200000
name = "Offworld_DQN4"

env = Monitor(env, log_dir)
callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir)
model = DQN("MlpPolicy",
            env,
            gamma=0.95,
            learning_rate=1e-3,
            verbose=0,
            buffer_size=1000,
            batch_size=16,
            exploration_fraction=0.9,
            exploration_final_eps=0.1,
            exploration_initial_eps=1.0,
            train_freq=1)
print(type(callback))
#, exploration_fraction=0.1, exploration_final_eps=0.02, exploration_initial_eps=1.0, train_freq=1
model.learn(total_timesteps=int(time_steps), callback=callback)

results_plotter.plot_results([log_dir], time_steps,
                             results_plotter.X_TIMESTEPS, name)
plt.savefig(name + '.png')
model.save(name)

model = DQN.load(name)
Example #2
0
# Effective code with DQN package 
import gym 

from stable_baselines3.common.env_util import make_atari_env 
from stable_baselines3.common.vec_env import VecFrameStack 
from stable_baselines3 import DQN

env = make_atari_env('Assault-v0', n_envs=1, seed=0)  
env = VecFrameStack(env, n_stack=1) 

model = DQN('CnnPolicy', env, verbose=1, tensorboard_log="./DQN_log/") 
model.learn(total_timesteps=int(4e4)) 

obs = env.reset() 
obs_ = obs.transpose(3,0,1,2) 

while True:
    action, _states = model.predict(obs_) 
    obs, rewards, dones, info = env.step(action) 
    env.render() 
Example #3
0
import gym
from stable_baselines3 import DQN

env = gym.make('CartPole-v0')

model = DQN.load("dqn_cartpole")

obs = env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        obs = env.reset()
Example #4
0
import gym
from stable_baselines3 import DQN
from stable_baselines3.dqn import MlpPolicy
import gym_rock_paper_scissors
from gym_rock_paper_scissors.utils.eval import eval_rock_paper_scissors_agent
import os

# install custom env using pip first

sequence_env = gym.make("RockPaperScissorsSequencePolicy2Env-v0",
                        other_sequence=True)
random_env = gym.make("RockPaperScissorsRandomPolicyEnv-v0")
biased_env = gym.make("RockPaperScissorsBiasedPolicyEnv-v0")

agent = DQN(MlpPolicy, sequence_env, verbose=1)
agent.learn(total_timesteps=80000, log_interval=4)
agent.save("dqn_rps")

del agent

agent = DQN.load("dqn_rps")
score = eval_rock_paper_scissors_agent(agent, sequence_env)
print(score)

os.remove("dqn_rps.zip")
from utils import SaveOnBestTrainingRewardCallback

# setup
CHECKPOINT_STR = datetime.now().strftime("%Y.%m.%d-%H:%M:%S")
CHECKPOINT_DIR = "checkpoints/" + CHECKPOINT_STR
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

env_name = "CartPole-v1"
env = gym.make(env_name)
env = Monitor(env, os.path.join(
    CHECKPOINT_DIR,
    "training_progress"))  # this monitors the training for later inspection

# MLP policy_name is predefined for DQN, see file stable_baselines3/dqn/policies.py
policy_name = "MlpPolicy"
model = DQN(policy_name, env, verbose=1)

# callback for model training
# saves checkpoint if current version of model is better than all before...
callback = SaveOnBestTrainingRewardCallback(check_freq=1000,
                                            log_dir=CHECKPOINT_DIR,
                                            save_path=CHECKPOINT_DIR)

# training
total_timesteps = 150000
model.learn(total_timesteps=total_timesteps, callback=callback)

# we don't have to save manually when we use the callback in the model.learn call
# model.save(os.path.join(CHECKPOINT_DIR, "mlp_dqn_cartpole"))

plot_results([CHECKPOINT_DIR],
def test_dqn_custom_policy():
    policy_kwargs = dict(optimizer_class=RMSpropTFLike, net_arch=[32])
    _ = DQN("MlpPolicy", "CartPole-v1", policy_kwargs=policy_kwargs, learning_starts=100).learn(300)
Example #7
0
import highway_env

# env = gym.make("highway-v0")

from stable_baselines3.common.env_util import make_vec_env

# Parallel environments
env = make_vec_env("highway-v0", n_envs=1)

#############################AGENT############################################

from stable_baselines3 import DQN

# model = DQN.load("MyAutonomousDrivingAgent") # use an existing model, if available

model = DQN("MlpPolicy", env, verbose=2)
model.learn(total_timesteps=10, log_interval=1)

# from stable_baselines3 import PPO
# model = PPO(MlpPolicy, env, verbose=1)
# model.learn(total_timesteps=100,  log_interval=10)

# model.save("MyAutonomousDrivingAgent")
# del model # remove the model

##############################OBSERVATION######################################

observation = env.reset()
done = False
while not done:
    action, _states = model.predict(observation)