def main(model_path, n_test_episodes): run_paths = glob.glob(os.path.join(model_path, '*')) for run_path in run_paths: if len(glob.glob(os.path.join(run_path, 'carracing_results*'))) > 0: print(run_path, 'already processed') continue # Load run config run_config = json.load(open(os.path.join(run_path, 'config.json'), 'r')) env = gym.make("CarRacing-v0").unwrapped num_actions = 5 # Define networks and load agent if run_config['model'] == 'Resnet': Q_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device) Q_target_net = ResnetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to( device) elif run_config['model'] == 'Lenet': Q_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device) Q_target_net = LeNetVariant(num_actions=num_actions, history_length=run_config['history_length'] + 1).to( device) elif run_config['model'] == 'DeepQNetwork': Q_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to(device) Q_target_net = DeepQNetwork(num_actions=num_actions, history_length=run_config['history_length'] + 1).to( device) else: raise ValueError('{} not implmented.'.format(run_config['model'])) agent = DQNAgent(Q=Q_net, Q_target=Q_target_net, num_actions=num_actions, **run_config) agent.load(os.path.join(run_path, 'agent.pt')) episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, history_length=run_config['history_length'], do_training=False, rendering=True, normalize_images=run_config['normalize_images'], skip_frames=run_config['skip_frames'], max_timesteps=1000) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() fname = "{}/carracing_results_dqn-{}.json".format(run_path, datetime.now().strftime("%Y%m%d-%H%M%S")) fh = open(fname, "w") json.dump(results, fh) fh.close() env.close() print('... finished')
np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... state_dim = 4 num_actions = 2 Q = MLP(state_dim, num_actions) Q_target = MLP(state_dim, num_actions) agent = DQNAgent(Q, Q_target, num_actions) agent.load("C:\\Users\\Monish\\Desktop\\workspace\\exercise3_R\\reinforcement_learning\\models_cartpole\\dqn_agent_199.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() if not os.path.exists("./results"):
def setup_ai(model_path): agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, None, None, None) agent.epsilon = 0.01 agent.load(model_path) return DQNAgentWrapper(agent, STACK_SIZE)
np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... states_dim = 4 action_dim = 2 Q = MLP(states_dim, action_dim) Q_target = MLP(states_dim, action_dim) agent = DQNAgent(Q, Q_target, action_dim, double=True) agent.load("./models_cartpole/dqn_agent_fixed_1.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean()
parser = argparse.ArgumentParser() parser.add_argument("-m", "--model", type=str, help="Model file to use", required=True) parser.add_argument('-e', "--episodes", type=int, help="num episodes to try", default=5, required=False) args = parser.parse_args() env = gym.make("CarRacing-v0").unwrapped history_length = 5 #TODO: Define networks and load agent # .... Q_network = CNN(history_length=history_length, n_classes=5) Q_target = CNN(history_length=history_length, n_classes=5) agent = DQNAgent(Q=Q_network, Q_target=Q_target, num_actions=5) agent.load(args.model) episode_rewards = [] for i in range(args.episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True, history_length=history_length) episode_rewards.append(stats.episode_reward) print('Episode %d - [ Reward %.2f ]' % (i+1, stats.episode_reward)) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() if not os.path.exists("./results"): os.mkdir("./results")
def setup_ai(model_path): agent = DQNAgent(STATE_SIZE, N_ACTIONS, N_AGENTS, EPSILON, None, None, 0.95) agent.load(model_path) return DQNAgentWrapper(agent, STACK_SIZE)
from agent.networks import * import numpy as np from agent.networks import MLP import torch np.random.seed(0) if __name__ == "__main__": env = gym.make("CartPole-v0").unwrapped # TODO: load DQN agent # ... Q = MLP(state_dim = 4,action_dim = 2) Q_target = MLP(state_dim = 4, action_dim = 2) agent = DQNAgent(Q, Q_target, num_actions = 2) agent.load("./models_cartpole/dqn_agent-perfect.pt") n_test_episodes = 15 episode_rewards = [] for i in range(n_test_episodes): stats = run_episode(env, agent, deterministic=True, do_training=False, rendering=True,epsilon=0) episode_rewards.append(stats.episode_reward) # save results in a dictionary and write them into a .json file results = dict() results["episode_rewards"] = episode_rewards results["mean"] = np.array(episode_rewards).mean() results["std"] = np.array(episode_rewards).std() if not os.path.exists("./results"): os.mkdir("./results")