def plot(values_list, policy, i, p_win, name=None, legend=True): with plt.rc_context(plotting.rc()): fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) for sweep, v in enumerate(values_list, start=1): ax1.plot(v, label=sweep, lw=0.5) for ax in ax1, ax2: ax.grid(alpha=0.1, ls=':') if legend: ax1.legend(title="Sweep", bbox_to_anchor=(1, 1)) ax1.set_title(fr"Optimal Values: $\theta=10^{{{-i}}}$", x=0.05, y=0.95, ha='left', va='top', fontsize=10) ax2.plot(policy) ax2.set_title(fr"Optimal Policy: $\theta=10^{{{-i}}}$", x=0.05, y=0.95, ha='left', va='top', fontsize=10) plt.suptitle(fr"$\mathbb{{P}}(\mathtt{{win}})={p_win}$") if name is not None: plt.savefig(os.path.join(c.Paths.output, 'ex_4_9', name + '.eps'), format='eps', dpi=1000, bbox_inches='tight') return fig, (ax1, ax2)
random_state = np.random.RandomState(seed=0) states = np.arange(7) weights = np.array([1, 1, 1, 1, 1, 1, 10, 1]) alpha = 0.01 gamma = 0.99 weights_list = [weights] for i in range(n_steps): s = random_state.choice(states) weights = weights + 7 * alpha * (gamma * q(states[-1], weights) - q(s, weights)) * feature(s) weights_list.append(weights) output = np.c_[weights_list] with plt.rc_context(plotting.rc()): fig, ax = plt.subplots(1) lines = ax.plot(output) ax.legend(lines, [f"w{i+1}" for i in range(output.shape[1])]) ax.grid(alpha=0.1) ax.set_xlabel("Steps") ax.set_ylabel("Weight") ax.set_title("Q-learning on Baird's Counterexample") plt.tight_layout() plotting.savefig(fig, path=os.path.join( c.Paths.output, "ex_11_3", "bairds_counter_example_q_learning.png"))
""" from itertools import product import os import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt import numpy as np from plotting import rc from exercises.utils import read_pickle from exercises.ex_4_7 import output_folder if __name__ == "__main__": plt.rcParams.update(rc()) plt.rcParams.update({'figure.figsize': (15, 8)}) policy = read_pickle(os.path.join(output_folder, 'policy.pkl')) values = read_pickle(os.path.join(output_folder, 'values.pkl')) max_cars = values.shape[0] fig = plt.figure() ax = fig.add_subplot(121) lim = np.max(np.abs(policy)) ax.matshow(policy.T, cmap=plt.cm.bwr, vmin=-lim, vmax=lim) ax.set_xticks(range(max_cars)) ax.set_yticks(range(max_cars)) ax.xaxis.set_ticks_position('none') ax.yaxis.set_ticks_position('none') ax.set_xlabel("Cars at location x")