from train_cartpole import run_episode
from agent.networks import MLP
import numpy as np

np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    states_dim = 4
    action_dim = 2

    Q = MLP(states_dim, action_dim)
    Q_target = MLP(states_dim, action_dim)
    agent = DQNAgent(Q, Q_target, action_dim, double=True)
    agent.load("./models_cartpole/dqn_agent_fixed_1.pt")
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
예제 #2
0
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.

    env = gym.make("CartPole-v0").unwrapped
    #import pdb; pdb.set_trace()
    state_dim = 4
    num_actions = 2

    # TODO:
    # 1. init Q network and target network (see dqn/networks.py)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    # 3. train DQN agent with train_online(...)

    # Duelling DQN or Not
    Duel = False

    num_episodes = 2000

    if Duel:
        Q = MLP_Duel(state_dim, num_actions)
        Q_target = MLP_Duel(state_dim, num_actions)
    else:
        Q = MLP(state_dim, num_actions)
        Q_target = MLP(state_dim, num_actions)

    DQNAgent = DQNAgent(Q,
                        Q_target,
                        num_actions,
                        double=True,
                        history_length=1e6)
    train_online(env, DQNAgent, num_episodes, epsilon_decay=False)
예제 #3
0
if __name__ == "__main__":
    # You find information about cartpole in
    # https://github.com/openai/gym/wiki/CartPole-v0
    # Hint: CartPole is considered solved when the average reward is greater than or equal to 195.0 over 100 consecutive trials.
    parser = argparse.ArgumentParser()
    parser.add_argument("-i", "--interrupt", action='store_true', help="Save model if interrupted",
                        default=False, required=False)
    parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=500, required=False)
    parser.add_argument('-s', "--steps", type=int, help="num steps per episode", default=200, required=False)
    parser.add_argument("-r", "--render", action='store_true', help="render during training and evaluation",
                        default=False, required=False)
    args = parser.parse_args()
    print(args)

    env = gym.make("CartPole-v0").unwrapped

    state_dim = 4
    num_actions = 2

    # TODO: 
    # 1. init Q network and target network (see dqn/networks.py)
    Q_network = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    # 2. init DQNAgent (see dqn/dqn_agent.py)
    agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=num_actions, buffer_size=1e5, lr=1e-4)
    # 3. train DQN agent with train_online(...)
    train_online(env=env, agent=agent, num_episodes=args.episodes, max_timesteps=args.steps,
                 eval_cycle=20, num_eval_episodes=5, rendering=args.render,
                 tensorboard_dir='./tensorboard', save_interrupt=args.interrupt)
import json
from agent.dqn_agent import DQNAgent
from train_cartpole import run_episode
from agent.networks import *
import numpy as np
from agent.networks import MLP
import torch
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    Q = MLP(state_dim = 4,action_dim = 2)
    Q_target = MLP(state_dim = 4, action_dim = 2)
    agent = DQNAgent(Q, Q_target, num_actions = 2)
    agent.load("./models_cartpole/dqn_agent-perfect.pt")
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True,epsilon=0)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()