from train_cartpole import run_episode from agent.networks import MLP import numpy as np np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... states_dim = 4 action_dim = 2 Q = MLP(states_dim, action_dim) Q_target = MLP(states_dim, action_dim) agent = DQNAgent(Q, Q_target, action_dim, double=True) agent.load("./models_cartpole/dqn_agent_fixed_1.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file
# Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. env = gym.make("CartPole-v0").unwrapped #import pdb; pdb.set_trace() state_dim = 4 num_actions = 2 # TODO: # 1. init Q network and target network (see dqn/networks.py) # 2. init DQNAgent (see dqn/dqn_agent.py) # 3. train DQN agent with train_online(...) # Duelling DQN or Not Duel = False num_episodes = 2000 if Duel: Q = MLP_Duel(state_dim, num_actions) Q_target = MLP_Duel(state_dim, num_actions) else: Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) DQNAgent = DQNAgent(Q, Q_target, num_actions, double=True, history_length=1e6) train_online(env, DQNAgent, num_episodes, epsilon_decay=False)
if __name__ == "__main__": # You find information about cartpole in # https://github.com/openai/gym/wiki/CartPole-v0 # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials. parser = argparse.ArgumentParser() parser.add_argument("-i", "--interrupt", action='store_true', help="Save model if interrupted", default=False, required=False) parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=500, required=False) parser.add_argument('-s', "--steps", type=int, help="num steps per episode", default=200, required=False) parser.add_argument("-r", "--render", action='store_true', help="render during training and evaluation", default=False, required=False) args = parser.parse_args() print(args) env = gym.make("CartPole-v0").unwrapped state_dim = 4 num_actions = 2 # TODO: # 1. init Q network and target network (see dqn/networks.py) Q_network = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) # 2. init DQNAgent (see dqn/dqn_agent.py) agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=num_actions, buffer_size=1e5, lr=1e-4) # 3. train DQN agent with train_online(...) train_online(env=env, agent=agent, num_episodes=args.episodes, max_timesteps=args.steps, eval_cycle=20, num_eval_episodes=5, rendering=args.render, tensorboard_dir='./tensorboard', save_interrupt=args.interrupt)
import json from agent.dqn_agent import DQNAgent from train_cartpole import run_episode from agent.networks import * import numpy as np from agent.networks import MLP import torch np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... Q = MLP(state_dim = 4,action_dim = 2) Q_target = MLP(state_dim = 4, action_dim = 2) agent = DQNAgent(Q, Q_target, num_actions = 2) agent.load("./models_cartpole/dqn_agent-perfect.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True,epsilon=0) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std()