def question_1(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 50 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): #print("Episode number: "+str(e)) rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() #print("Number of steps: "+str(steps)) # print(steps[r, e]) np.save('steps', steps) plotGraph() del agent, environment, rlglue agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 1000 num_runs = 1 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): print("Episode number: "+str(e)) rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() #print("Number of steps: "+str(steps)) # print(steps[r, e]) #np.save('steps', steps) #plotGraph() rlglue.rl_agent_message("plot3DGraph")
def question_3(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 1000 num_runs = 1 max_eps_steps = 1000000 for _ in range(num_runs): rlglue.rl_init() i = 0 for i in range(num_episodes): rlglue.rl_episode(max_eps_steps) print(i) fout = open('value', 'w') steps = 50 w, iht = rlglue.rl_agent_message("ValueFunction") Q = np.zeros([steps, steps]) for i in range(steps): for j in range(steps): values = [] for a in range(3): value = 0 for index in tiles(iht, 8, [ 8 * (-1.2 + (i * 1.7 / steps)) / 1.7, 8 * (-0.07 + (j * 0.14 / steps)) / 0.14 ], [a]): value -= w[index] values.append(value) height = max(values) fout.write(repr(height) + ' ') Q[j][i] = height fout.write('\n') fout.close() np.save("value", Q)
def question_3(): agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) max_eps_steps = 100000 num_episodes = 1000 num_runs = 1 numActions=3 rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) weights = rlglue.rl_agent_message("3D plot of the cast-to-go") fout = open('value','w') steps = 50 z = np.zeros((50,50)) for i in range(steps): for j in range(steps): values = [] for a in range(numActions): tile = [8*(-1.2+(i*1.7/steps))/1.7,8*(-0.07+(j*0.14/steps))/0.14] inds = agent.get_index(tile,a) values.append(np.sum([weights[i] for i in inds])) height = max(values) z[j][i]=-height fout.write(repr(-height)+' ') fout.write('\n') fout.close() fig = plt.figure() ax = fig.add_subplot(111,projection ='3d') x = np.arange(-1.2,0.5,1.7/50) y = np.arange(-0.07,0.07,0.14/50) x,y = np.meshgrid(x,y) ax.set_xticks([-1.2, 0.5]) ax.set_yticks([0.07, -0.07]) ax.set_ylabel('Velocity') ax.set_xlabel('Position') ax.set_zlabel('Cost-To-Go') ax.plot_surface(x,y,z) plt.savefig('cost-to-go.png') plt.show() np.save('steps', steps)
def part3(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 1 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() # print(steps[r, e]) np.save('steps', steps) fout = open('value', 'w') steps = 50 num_of_actions = 3 for i in range(steps): for j in range(steps): q = [] for a in range(num_of_actions): pos = -1.2 + (1 * 1.7 / steps) vel = -0.07 + (j * 0.14 / steps) tile = (pos, vel) inds = Agent.F(tile, self.action) q.append(np.sum(self.weights[inds])) height = max(q) fout.write(repr(-height) + '') fout.write('\n') fout.close() np.save('heights', height) np.save('steps', steps)
def question_1(num_episodes): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) max_eps_steps = 100000 steps = np.zeros(num_episodes) rlglue.rl_init() for e in tqdm(range(num_episodes)): rlglue.rl_episode(max_eps_steps) steps[e] = rlglue.num_ep_steps() # print(steps[e]) return steps
def question_1(): agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 5 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() # print(steps[r, e]) np.save('steps', steps)
def question_2(): agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) max_eps_steps = 100000 num_episodes = 1000 rlglue.rl_init() for _ in tqdm(range(num_episodes)): rlglue.rl_episode(max_eps_steps) q3_plot = rlglue.rl_agent_message("plot") fig = plt.figure() ax = fig.gca(projection='3d') X, Y = np.meshgrid(q3_plot[0], q3_plot[1]) surf = ax.plot_surface(X, Y, q3_plot[2]) ax.set_xlim(q3_plot[0][0], q3_plot[0][-1]) ax.set_ylim(q3_plot[1][0], q3_plot[1][-1]) plt.show()
def question_3(): num_episodes = 1000 num_runs = 1 max_eps_steps = 100000 agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) for r in range(num_runs): start = time.time() print("run number : ", r) rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) end = time.time() print(str(end - start) + " seconds elapsed") action_vals, pos, vel = rlglue.rl_agent_message("return info") action_vals = np.multiply(action_vals, -1) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(pos, vel, action_vals) plt.show()
def question_1(): # Specify hyper-parameters agent = Agent() environment = Environment() rlglue = RLGlue(environment, agent) num_episodes = 200 num_runs = 50 max_eps_steps = 100000 steps = np.zeros([num_runs, num_episodes]) for r in range(num_runs): print("run number : ", r) st = time.time() rlglue.rl_init() for e in range(num_episodes): rlglue.rl_episode(max_eps_steps) steps[r, e] = rlglue.num_ep_steps() # print(steps[r, e]) finish = time.time() - st print(str(finish) + " seconds elapsed") np.save('steps', steps)