def main(model_path, n_test_episodes):
    run_paths = glob.glob(os.path.join(model_path, '*'))
    for run_path in run_paths:
        if len(glob.glob(os.path.join(run_path, 'carracing_results*'))) > 0:
            print(run_path, 'already processed')
            continue
        # Load run config
        run_config = json.load(open(os.path.join(run_path, 'config.json'), 'r'))
        env = gym.make("CarRacing-v0").unwrapped

        num_actions = 5

        # Define networks and load agent
        if run_config['model'] == 'Resnet':
            Q_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device)
            Q_target_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(
                device)
        elif run_config['model'] == 'Lenet':
            Q_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device)
            Q_target_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(
                device)
        elif run_config['model'] == 'DeepQNetwork':
            Q_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device)
            Q_target_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(
                device)
        else:
            raise ValueError('{} not implmented.'.format(run_config['model']))

        agent = DQNAgent(Q=Q_net, Q_target=Q_target_net, num_actions=num_actions, **run_config)
        agent.load(os.path.join(run_path, 'agent.pt'))

        episode_rewards = []
        for i in range(n_test_episodes):
            stats = run_episode(env, agent, deterministic=True, history_length=run_config['history_length'],
                                do_training=False, rendering=True, normalize_images=run_config['normalize_images'],
                                skip_frames=run_config['skip_frames'], max_timesteps=1000)
            episode_rewards.append(stats.episode_reward)

        # save results in a dictionary and write them into a .json file
        results = dict()
        results["episode_rewards"] = episode_rewards
        results["mean"] = np.array(episode_rewards).mean()
        results["std"] = np.array(episode_rewards).std()
        fname = "{}/carracing_results_dqn-{}.json".format(run_path, datetime.now().strftime("%Y%m%d-%H%M%S"))
        fh = open(fname, "w")
        json.dump(results, fh)
        fh.close()

        env.close()
        print('... finished')
Esempio n. 2
0
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    state_dim = 4
    num_actions = 2
    
    Q = MLP(state_dim, num_actions)
    Q_target = MLP(state_dim, num_actions)
    agent = DQNAgent(Q, Q_target, num_actions)
    agent.load("C:\\Users\\Monish\\Desktop\\workspace\\exercise3_R\\reinforcement_learning\\models_cartpole\\dqn_agent_199.pt")
 
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
 
    if not os.path.exists("./results"):
Esempio n. 3
0
def setup_ai(model_path):
    agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, None, None, None)
    agent.epsilon = 0.01
    agent.load(model_path)
    return DQNAgentWrapper(agent, STACK_SIZE)
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    states_dim = 4
    action_dim = 2

    Q = MLP(states_dim, action_dim)
    Q_target = MLP(states_dim, action_dim)
    agent = DQNAgent(Q, Q_target, action_dim, double=True)
    agent.load("./models_cartpole/dqn_agent_fixed_1.pt")
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env,
                            agent,
                            deterministic=True,
                            do_training=False,
                            rendering=True)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
Esempio n. 5
0
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model", type=str, help="Model file to use", required=True)
    parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=5, required=False)
    args = parser.parse_args()

    env = gym.make("CarRacing-v0").unwrapped

    history_length =  5

    #TODO: Define networks and load agent
    # ....
    Q_network = CNN(history_length=history_length, n_classes=5)
    Q_target = CNN(history_length=history_length, n_classes=5)
    agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=5)
    agent.load(args.model)

    episode_rewards = []
    for i in range(args.episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True, history_length=history_length)
        episode_rewards.append(stats.episode_reward)
        print('Episode %d - [ Reward %.2f ]' % (i+1, stats.episode_reward))

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
 
    if not os.path.exists("./results"):
        os.mkdir("./results")  
Esempio n. 6
0
def setup_ai(model_path):
    agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, EPSILON, None, None,
                     0.95)
    agent.load(model_path)
    return DQNAgentWrapper(agent, STACK_SIZE)
Esempio n. 7
0
def setup_ai(model_path):
    agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, EPSILON, None, None, 0.95)
    agent.load(model_path)
    return DQNAgentWrapper(agent, STACK_SIZE)
from agent.networks import *
import numpy as np
from agent.networks import MLP
import torch
np.random.seed(0)

if __name__ == "__main__":

    env = gym.make("CartPole-v0").unwrapped

    # TODO: load DQN agent
    # ...
    Q = MLP(state_dim = 4,action_dim = 2)
    Q_target = MLP(state_dim = 4, action_dim = 2)
    agent = DQNAgent(Q, Q_target, num_actions = 2)
    agent.load("./models_cartpole/dqn_agent-perfect.pt")
    n_test_episodes = 15

    episode_rewards = []
    for i in range(n_test_episodes):
        stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True,epsilon=0)
        episode_rewards.append(stats.episode_reward)

    # save results in a dictionary and write them into a .json file
    results = dict()
    results["episode_rewards"] = episode_rewards
    results["mean"] = np.array(episode_rewards).mean()
    results["std"] = np.array(episode_rewards).std()
 
    if not os.path.exists("./results"):
        os.mkdir("./results")