def plot(values_list, policy, i, p_win, name=None, legend=True):
        with plt.rc_context(plotting.rc()):
            fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
            for sweep, v in enumerate(values_list, start=1):
                ax1.plot(v, label=sweep, lw=0.5)

            for ax in ax1, ax2:
                ax.grid(alpha=0.1, ls=':')

            if legend:
                ax1.legend(title="Sweep", bbox_to_anchor=(1, 1))
            ax1.set_title(fr"Optimal Values: $\theta=10^{{{-i}}}$",
                          x=0.05,
                          y=0.95,
                          ha='left',
                          va='top',
                          fontsize=10)
            ax2.plot(policy)
            ax2.set_title(fr"Optimal Policy: $\theta=10^{{{-i}}}$",
                          x=0.05,
                          y=0.95,
                          ha='left',
                          va='top',
                          fontsize=10)

            plt.suptitle(fr"$\mathbb{{P}}(\mathtt{{win}})={p_win}$")
            if name is not None:
                plt.savefig(os.path.join(c.Paths.output, 'ex_4_9',
                                         name + '.eps'),
                            format='eps',
                            dpi=1000,
                            bbox_inches='tight')

        return fig, (ax1, ax2)
Ejemplo n.º 2
0
    random_state = np.random.RandomState(seed=0)

    states = np.arange(7)
    weights = np.array([1, 1, 1, 1, 1, 1, 10, 1])
    alpha = 0.01
    gamma = 0.99

    weights_list = [weights]
    for i in range(n_steps):
        s = random_state.choice(states)
        weights = weights + 7 * alpha * (gamma * q(states[-1], weights) -
                                         q(s, weights)) * feature(s)
        weights_list.append(weights)

    output = np.c_[weights_list]

    with plt.rc_context(plotting.rc()):
        fig, ax = plt.subplots(1)
        lines = ax.plot(output)
        ax.legend(lines, [f"w{i+1}" for i in range(output.shape[1])])
        ax.grid(alpha=0.1)
        ax.set_xlabel("Steps")
        ax.set_ylabel("Weight")
        ax.set_title("Q-learning on Baird's Counterexample")
        plt.tight_layout()

        plotting.savefig(fig,
                         path=os.path.join(
                             c.Paths.output, "ex_11_3",
                             "bairds_counter_example_q_learning.png"))
Ejemplo n.º 3
0
"""
from itertools import product
import os

import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
import numpy as np

from plotting import rc
from exercises.utils import read_pickle
from exercises.ex_4_7 import output_folder

if __name__ == "__main__":
    plt.rcParams.update(rc())
    plt.rcParams.update({'figure.figsize': (15, 8)})
    policy = read_pickle(os.path.join(output_folder, 'policy.pkl'))
    values = read_pickle(os.path.join(output_folder, 'values.pkl'))

    max_cars = values.shape[0]

    fig = plt.figure()
    ax = fig.add_subplot(121)
    lim = np.max(np.abs(policy))
    ax.matshow(policy.T, cmap=plt.cm.bwr, vmin=-lim, vmax=lim)
    ax.set_xticks(range(max_cars))
    ax.set_yticks(range(max_cars))
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('none')
    ax.set_xlabel("Cars at location x")