def make_rmalpha(): c = list(np.arange(0.01, 0.1, 0.01)) + [ 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 5, 10, 30 ] t = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 1, 1.5] l = list(itertools.product(c, t)) params = [td.RMalpha(ct, tt) for ct, tt in l] return params
methods = [] alpha = 0.2 mu = 2 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.4, 0.5 gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(0.03, .1) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .004 lam = 1. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 1. alpha = 0.004
methods = [] alpha = 0.009 mu = .1 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.02, 0.1 gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(0.04, .25) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .004 lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 0. alpha = 0.004
methods.append(gtd) alpha = .5 mu = 1. gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "#6E086D" methods.append(gtd) alpha = 0.2 lam = 1. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) methods.append(td0) alpha = td.RMalpha(10., 0.5) lam = 0. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi) td0.name = r"TD({}) $\alpha={}t^{{-{} }}$".format(lam, alpha.c, alpha.mu) methods.append(td0) alpha = td.DabneyAlpha() lam = 0. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi) td0.name = r"TD({}) $\alpha$=aut.".format(lam) methods.append(td0) alpha = 0.2 mu = 0.0001 lam = 1. tdc = td.TDCLambda(lam=lam, alpha=alpha, beta=alpha * mu, phi=phi)
methods = [] alpha = 0.001 mu = .0001 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.001, 1. gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(0.03, 0.25) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .002 lam = .2 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 0.2 alpha = 0.002
methods = [] alpha = 0.2 mu = .01 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.3, 2. gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(5., 0.25) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .3 lam = .2 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 0.0 alpha = 0.3
normalize_phi=True, mu_next=1000, mu_iter=1000, mu_restarts=8) #states, _, _, _, _ = mdp.samples_cached(n_iter=1000, n_restarts=15, # policy=policy, seed=8000) lam = 0.0 alpha = 0.00002 mu = .0002 tdc = td.TDCLambda(alpha=alpha, mu=mu, lam=lam, phi=phi) tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) lam = 0.0 alpha = td.RMalpha(.00006, 0.02) beta = td.RMalpha(.00001, 0.1) tdcrm = td.TDCLambda(alpha=alpha, beta=beta, lam=lam, phi=phi) tdcrm.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) lam = 0. eps = 10000 rlstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi) rlstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps) lam = 0. eps = 1000000 lstd = td.LSTDLambda(lam=lam, eps=eps, phi=phi) lstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps) l = 1000
methods = [] alpha = 0.007 mu = .0001 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.002, 1 gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(0.01, 0.1) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .004 lam = .4 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 0. alpha = 0.002
methods = [] alpha = 0.001 mu = .01 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" #methods.append(gtd) alpha, mu = 0.006, 0.5 gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" #methods.append(gtd) alpha = td.RMalpha(0.6, .7) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" #methods.append(td0) alpha = .006 lam = .4 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 0.2 alpha = 0.006
methods = [] alpha = 0.0005 mu = 2. gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.0005, 1. gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(0.06, 0.5) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .0005 lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 0.0 alpha = 0.0005
methods = [] alpha = 0.007 mu = .0001 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.003, 4 gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(0.09, 0.25) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = td.DabneyAlpha() lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$=auto".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .001 lam = .4
methods = [] alpha = 0.05 mu = .01 gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "r" methods.append(gtd) alpha, mu = 0.07, 2. gtd = td.GTD2(alpha=alpha, beta=mu * alpha, phi=phi) gtd.name = r"GTD2 $\alpha$={} $\mu$={}".format(alpha, mu) gtd.color = "orange" methods.append(gtd) alpha = td.RMalpha(.4, 0.1) lam = .0 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .1 lam = .2 td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 0.0 alpha = 0.15
beh_pol = policies.Discrete(np.random.rand(n, n_a)) tar_pol = beh_pol task = LinearDiscreteValuePredictionTask(mdp, gamma, phi, np.zeros(phi.dim), policy=beh_pol) lam = 0.0 alpha = 0.007 mu = .01 tdc = td.TDCLambda(alpha=alpha, mu=mu, lam=lam, phi=phi) tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) lam = 0.0 alpha = td.RMalpha(.05, 0.2) beta = td.RMalpha(.5, 0.25) tdcrm = td.TDCLambda(alpha=alpha, beta=beta, lam=lam, phi=phi) tdcrm.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) lam = 0. eps = 10000 rlstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi) rlstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps) lam = 0. eps = 1000000 lstd = td.LSTDLambda(lam=lam, eps=eps, phi=phi) lstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps) l = 8000