def part2b(): Agent = HW2Agent(env.build_map(), env.STATEMAP, 0.5, 0.95, 0.01) Agent.value_iteration() print("Agent B:") print("\tNum evaluations: " + str(Agent.num_evals)) print(Agent.values) x,y = Agent.generate_best_path() plt.imshow(env.build_map()) plt.scatter(y,x) plt.show() plt.imshow(Agent.values) plt.show()
def part2c(): Agent = HW2Agent(env.build_map(), env.STATEMAP, 0.02, 0.55, 0.01) Agent.value_iteration() print("Agent C:") print("\tNum evaluations: " + str(Agent.num_evals)) print(Agent.values) x,y = Agent.generate_best_path(stochastic=True, cutoff=1000) plt.imshow(env.build_map()) plt.scatter(y,x) plt.show() plt.imshow(Agent.values) plt.show()
def part1a(): Agent = HW2Agent(env.build_map(), env.STATEMAP, 0.02, 0.95, 0.01) Agent.policy_iteration() print("Agent A:") print("\tNum improvements: " + str(Agent.num_improvements)) print("\tNum evaluations: " + str(Agent.num_evals)) print(Agent.values) x,y = Agent.generate_best_path() plt.imshow(env.build_map()) plt.scatter(y,x) plt.show() plt.imshow(Agent.values) plt.show()
import matplotlib.pyplot as plt from math import inf from agent_class import HW3Agent from build_environment import build_map, STATEMAP MAP = build_map() def test(episode_len, num_episodes, epsilon, default_r=-1): Agent = HW3Agent(MAP, STATEMAP, epsilon, default_r=default_r) Agent.on_policy_mc(episode_len, num_episodes) y,x = Agent.generate_best_path(cutoff=1000, stochastic=True) pathlen = len(x) if len(x) < 1000 else 'INF' pathlen = "%d episodes with %d steps: %s" % (num_episodes, episode_len, str(pathlen)) print(pathlen) plt.imshow(MAP) plt.scatter(x,y) title = "%d episodes of length %d" % (num_episodes, episode_len) plt.suptitle(title, fontsize=15) plt.title(pathlen) #plt.show() ''' for episodes in range(1,5): for elen in range(1,5): test(10 ** elen, 10 ** episodes, 0.02, default_r=-inf) '''