Beispiel #1
0
def run_agent_RMS_value(num_runs, num_episodes, discount, step_size, step=1):
    """ Run SARSA agent for num_episodes to get the state values """
    mdp = RandomWalk(19, -1)
    s = mdp.init()

    # ground truth for value
    gt_v = np.asarray(mdp.value_equiprobable(discount)[1:-1])
    # initial value
    init_v = np.asarray([0.5] * mdp.num_states())[1:-1]

    # Arrays for RMS error over all states
    rms_err = np.asarray([0.0] * (num_episodes + 1))
    sum_rms_err = np.asarray([0.0] * (num_episodes + 1))
    rms_err[0] = np.sqrt(np.mean(np.square(init_v - gt_v)))

    # create n-step SARSA agent
    agent = Sarsa(mdp, s, step)

    for run in range(num_runs):
        for i in range(num_episodes):
            agent.episode(discount, step_size, 10000)
            agent.init()
            rms_err[i + 1] = np.sqrt(np.mean(np.square(np.asarray(agent.Q_to_value()[1:-1]) - gt_v)))
        sum_rms_err += rms_err
        # Reset Q after a run
        agent.reset_Q()

    # averaged over num_runs
    return sum_rms_err / num_runs
Beispiel #2
0
def print_value():
    """ Print the RMS error on state values """
    mdp = RandomWalk(19, 1)
    # ground truth for value
    gt_v = np.asarray(mdp.value_equiprobable(1.0)[1:-1])
    # initial value
    init_v = np.asarray([0.5] * mdp.num_states())[1:-1]
    rms_err = np.sqrt(np.mean(np.square(init_v - gt_v)))
    print("RMS error is ", rms_err)
Beispiel #3
0
def run_agent_RMS_param(num_runs, num_episodes, discount, step_size, step=1, agent_type="Sarsa"):
    """ Run the n-step Sarsa agent and return the avg RMS over all states, episodes and runs """
    mdp = RandomWalk(19, -1)
    s = mdp.init()

    # ground truth for value
    gt_v = np.asarray(mdp.value_equiprobable(discount)[1:-1])
    # initial value
    init_v = np.asarray([0.5] * mdp.num_states())[1:-1]

    # Arrays for RMS error over all states
    rms_err = np.asarray([0.0] * num_episodes)
    sum_rms_err = 0.0
    # rms_err[0] = np.sqrt(np.mean(np.square(init_v - gt_v)))

    # create n-step agent
    print("Starting agent {}-step {}".format(step, agent_type))
    if agent_type.lower() == "sarsa":
        agent = Sarsa(mdp, s, step)
    elif agent_type.lower() == "expsarsa":
        agent = ExpSARSA(mdp, s, step)
    elif agent_type.lower() == "treebackup":
        agent = TreeBackup(mdp, s, step)
    elif agent_type.lower() == "qsigma":
        agent = QSigma(mdp, 0.5, s, step)
    else:
        raise Exception("Wrong type of agent")

    for run in range(num_runs):
        for i in range(num_episodes):
            agent.episode(discount, step_size, 10000)
            agent.init()
            rms_err[i] = np.sqrt(np.mean(np.square(np.asarray(agent.Q_to_value()[1:-1]) - gt_v)))
        sum_rms_err += np.sum(rms_err)
        # Reset Q after a run
        agent.reset_Q()

    # averaged over num_runs and num_episodes
    return sum_rms_err / (num_runs * num_episodes)
Beispiel #4
0
def plot_value():
    """ Plot the state values for random walk, found by Sarsa agent.
        Compare the effect on n-step.
    """
    num_episodes = 200
    discount = 1
    step_size = 0.1

    value_list = []
    for step in [2 ** x for x in range(5)]:
        value_list.append(run_agent_value(num_episodes, discount, step_size, step))

    mdp = RandomWalk(19)
    # ground truth for V
    gt_v = mdp.value_equiprobable(discount)
    # plot the value
    plt.plot(range(1, 20), gt_v[1:-1], 'ro-', label="True Value")
    colors = ["y", "b", "g", "m", "c"]
    for i, value in enumerate(value_list):
        plt.plot(range(1, 20), value[1:-1], 'o-', color=colors[i], label="{}-step SARSA".format(2 ** i))
    plt.legend(loc="upper left")
    plt.xlabel("State")
    plt.title("Value estimation of n-step Sarsa after {} episodes".format(num_episodes))
    plt.show()