def run_agent_RMS_value(num_runs, num_episodes, discount, step_size, step=1): """ Run SARSA agent for num_episodes to get the state values """ mdp = RandomWalk(19, -1) s = mdp.init() # ground truth for value gt_v = np.asarray(mdp.value_equiprobable(discount)[1:-1]) # initial value init_v = np.asarray([0.5] * mdp.num_states())[1:-1] # Arrays for RMS error over all states rms_err = np.asarray([0.0] * (num_episodes + 1)) sum_rms_err = np.asarray([0.0] * (num_episodes + 1)) rms_err[0] = np.sqrt(np.mean(np.square(init_v - gt_v))) # create n-step SARSA agent agent = Sarsa(mdp, s, step) for run in range(num_runs): for i in range(num_episodes): agent.episode(discount, step_size, 10000) agent.init() rms_err[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q_to_value()[1:-1]) - gt_v))) sum_rms_err += rms_err # Reset Q after a run agent.reset_Q() # averaged over num_runs return sum_rms_err / num_runs
def print_value(): """ Print the RMS error on state values """ mdp = RandomWalk(19, 1) # ground truth for value gt_v = np.asarray(mdp.value_equiprobable(1.0)[1:-1]) # initial value init_v = np.asarray([0.5] * mdp.num_states())[1:-1] rms_err = np.sqrt(np.mean(np.square(init_v - gt_v))) print("RMS error is ", rms_err)
def run_agent_RMS_param(num_runs, num_episodes, discount, step_size, step=1, agent_type="Sarsa"): """ Run the n-step Sarsa agent and return the avg RMS over all states, episodes and runs """ mdp = RandomWalk(19, -1) s = mdp.init() # ground truth for value gt_v = np.asarray(mdp.value_equiprobable(discount)[1:-1]) # initial value init_v = np.asarray([0.5] * mdp.num_states())[1:-1] # Arrays for RMS error over all states rms_err = np.asarray([0.0] * num_episodes) sum_rms_err = 0.0 # rms_err[0] = np.sqrt(np.mean(np.square(init_v - gt_v))) # create n-step agent print("Starting agent {}-step {}".format(step, agent_type)) if agent_type.lower() == "sarsa": agent = Sarsa(mdp, s, step) elif agent_type.lower() == "expsarsa": agent = ExpSARSA(mdp, s, step) elif agent_type.lower() == "treebackup": agent = TreeBackup(mdp, s, step) elif agent_type.lower() == "qsigma": agent = QSigma(mdp, 0.5, s, step) else: raise Exception("Wrong type of agent") for run in range(num_runs): for i in range(num_episodes): agent.episode(discount, step_size, 10000) agent.init() rms_err[i] = np.sqrt(np.mean(np.square(np.asarray(agent.Q_to_value()[1:-1]) - gt_v))) sum_rms_err += np.sum(rms_err) # Reset Q after a run agent.reset_Q() # averaged over num_runs and num_episodes return sum_rms_err / (num_runs * num_episodes)
def plot_value(): """ Plot the state values for random walk, found by Sarsa agent. Compare the effect on n-step. """ num_episodes = 200 discount = 1 step_size = 0.1 value_list = [] for step in [2 ** x for x in range(5)]: value_list.append(run_agent_value(num_episodes, discount, step_size, step)) mdp = RandomWalk(19) # ground truth for V gt_v = mdp.value_equiprobable(discount) # plot the value plt.plot(range(1, 20), gt_v[1:-1], 'ro-', label="True Value") colors = ["y", "b", "g", "m", "c"] for i, value in enumerate(value_list): plt.plot(range(1, 20), value[1:-1], 'o-', color=colors[i], label="{}-step SARSA".format(2 ** i)) plt.legend(loc="upper left") plt.xlabel("State") plt.title("Value estimation of n-step Sarsa after {} episodes".format(num_episodes)) plt.show()