mserrors[:, i] = val - np.mean(val) print noise, lambdas[np.argmin(val)] #mserrors -= mserrors.min(axis=1)[:,None] #mserrors /= mserrors.max(axis=1)[:,None] return mserrors if __name__ == "__main__": plt.ion() fn = "data/noise_lambda.npz" if os.path.exists(fn): d = np.load(fn) globals().update(d) else: n_feat = 20 phi = features.lin_random(n_feat, n, constant=True) mserrors2 = lambda_errors(phi, lambdas, noises) phi = features.eye(n) mserrors1 = lambda_errors(phi, lambdas, noises) np.savez(fn, mserrors1=mserrors1, mserrors2=mserrors2, noises=noises, lambdas=lambdas) mserrors2 -= mserrors2.min(axis=0) mserrors2 /= mserrors2.max(axis=0) mserrors1 -= mserrors1.min(axis=0) #mserrors1 /= mserrors1.max(axis=0) plt.figure() mymap = mpl.colors.LinearSegmentedColormap.from_list(
Uniformly sampled random MDP with discrete states, on-policy case """ __author__ = "Christoph Dann <*****@*****.**>" import td import examples from task import LinearDiscreteValuePredictionTask import numpy as np import features import policies import regtd n = 400 n_a = 10 n_feat = 200 mdp = examples.RandomMDP(n, n_a) phi = features.lin_random(n_feat, n, constant=True) gamma = .95 np.random.seed(3) beh_pol = policies.Discrete(np.random.rand(n, n_a)) tar_pol = beh_pol task = LinearDiscreteValuePredictionTask(mdp, gamma, phi, np.zeros(phi.dim), policy=beh_pol) methods = [] alpha = 0.007 mu = .0001 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd)