import numpy as np import matplotlib.pyplot as plt if __name__ == "__main__": max_steps = 8000 num_runs = 1 # Create and pass agent and environment objects to RLGlue environment = WindygridEnvironment() agent = SarsaAgent() rlglue = RLGlue(environment, agent) del agent, environment # don't use these anymore for run in range(num_runs): episode=[] time_step=[] rlglue.rl_init() while True: rlglue.rl_episode() time_step.append(rlglue.num_steps()) episode.append(rlglue.num_episodes()) if rlglue.num_steps() > 8000: break plt.plot(time_step,episode,label="8 actions") plt.xticks([0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000]) plt.xlabel('Time steps') plt.ylabel('Episode', rotation=90) plt.legend(loc=2) plt.show() # save average value function numpy object, to be used by plotting script
from rl_glue import RLGlue from windy_env import WindyEnvironment from sarsa_agent import SarsaAgent import numpy as np import matplotlib.pyplot as plt max_steps = 8000 steps = 0 episodes = 0 ep_list = [] step_list = [] environment = WindyEnvironment() agent = SarsaAgent() rl = RLGlue(environment, agent) rl.rl_init() while steps < max_steps: rl.rl_episode(max_steps) steps = rl.num_steps() episodes = rl.num_episodes() # print(steps, episodes) ep_list.append(episodes) step_list.append(steps) plt.xlabel('Time steps') plt.ylabel('Episodes') plt.plot(step_list, ep_list) plt.show()
from rl_glue import RLGlue from windy_env import WindyEnvironment from n_step_sarsa_agent import SarsaAgent import numpy as np import time import matplotlib.pyplot as plt if __name__ == "__main__": start_time = time.time() max_steps = 8000 # Create and pass agent and environment objects to RLGlue environment = WindyEnvironment() agent = SarsaAgent() rlglue = RLGlue(environment, agent) del agent, environment # don't use these anymore rlglue.rl_init() L1 = [] L2 = [] n = rlglue.rl_agent_message('n') a = rlglue.rl_agent_message('a') while rlglue.num_steps() < max_steps: L1.append(rlglue.num_steps()) rlglue.rl_episode(10000) episodes = rlglue.num_episodes() L2.append(episodes) plt.title(str(n) + '-step sarsa with ' + str(a) + " actions") plt.plot(L1, L2) plt.show()
rlglue = RLGlue(environment, agent) rlglue.rl_agent_message('n = 0') del agent, environment # don't use these anymore steps1 = {} L1 = [] L2 = [0]*num_episodes for episode in range(num_episodes): steps1[episode]=[] L1.append(episode+1) for run in range(num_runs): np.random.seed(run) rlglue.rl_init() step = 0 for episode in range(num_episodes): rlglue.rl_episode(max_steps) new_step = rlglue.num_steps() steps1[episode].append(new_step-step) step = new_step for episode in range(num_episodes): L2[episode] = sum(steps1[episode])/num_runs plt.plot(L1[1:],L2[1:],label = '0 planning steps') environment =GridEnvironment() agent = DynaqAgent() rlglue = RLGlue(environment, agent) rlglue.rl_agent_message('n = 5') del agent, environment # don't use these anymore steps1 = {} L1 = [] L2 = [0]*num_episodes for episode in range(num_episodes):
from sarsa_agent import SarsaAgent import numpy as np if __name__ == "__main__": max_total_step = 8000 total_num_episode = [0] current_step = [0] # Create and pass agent and environment objects to RLGlue environment = WindEnvironment() agent = SarsaAgent() rlglue = RLGlue(environment, agent) del agent, environment # don't use these anymore # set seed for reproducibility np.random.seed(1) # initialize RL-Glue rlglue.rl_init() # loop for the experiment step less that 8000 while (rlglue.num_steps() < max_total_step): rlglue.rl_episode(max_total_step) total_num_episode.append(rlglue.num_episodes()) current_step.append(rlglue.num_steps()) print(total_num_episode) np.savez('windy.npz', timeSteps=current_step, Episodes=total_num_episode) # to load: # data = np.load('windy.npz') # print(data["timeSteps"]) to get the array