alpha = td.RMalpha(10., 0.5) lam = 0. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi) td0.name = r"TD({}) $\alpha={}t^{{-{} }}$".format(lam, alpha.c, alpha.mu) methods.append(td0) alpha = td.DabneyAlpha() lam = 0. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi) td0.name = r"TD({}) $\alpha$=aut.".format(lam) methods.append(td0) alpha = 0.2 mu = 0.0001 lam = 1. tdc = td.TDCLambda(lam=lam, alpha=alpha, beta=alpha * mu, phi=phi) tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) tdc.color = "r" methods.append(tdc) lam = .8 eps = 10000 lstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi) lstd.name = r"LSTD({})".format(lam) methods.append(lstd) lam = .0 eps = 100 lstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi) lstd.name = r"LSTD({})".format(lam) methods.append(lstd)
td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) alpha = .004 lam = 1. td0 = td.LinearTDLambda(alpha=alpha, lam=lam, phi=phi, gamma=gamma) td0.name = r"TD({}) $\alpha$={}".format(lam, alpha) td0.color = "k" methods.append(td0) lam = 1. alpha = 0.004 mu = 0.0001 tdc = td.TDCLambda(alpha=alpha, mu=mu, lam=lam, phi=phi, gamma=gamma) tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) tdc.color = "b" methods.append(tdc) alpha = .5 lam = 0.0 lstd = td.RecursiveLSPELambda(lam=lam, alpha=alpha, phi=phi, gamma=gamma) lstd.name = r"LSPE({}) $\alpha$={}".format(lam, alpha) lstd.color = "g" methods.append(lstd) lam = 0.0 eps = 100000 lstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi, gamma=gamma) lstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps)
gamma, phi, theta0, policy=policy, normalize_phi=True, mu_next=1000, mu_iter=1000, mu_restarts=8) #states, _, _, _, _ = mdp.samples_cached(n_iter=1000, n_restarts=15, # policy=policy, seed=8000) lam = 0.0 alpha = 0.00002 mu = .0002 tdc = td.TDCLambda(alpha=alpha, mu=mu, lam=lam, phi=phi) tdc.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) lam = 0.0 alpha = td.RMalpha(.00006, 0.02) beta = td.RMalpha(.00001, 0.1) tdcrm = td.TDCLambda(alpha=alpha, beta=beta, lam=lam, phi=phi) tdcrm.name = r"TDC({}) $\alpha$={} $\mu$={}".format(lam, alpha, mu) lam = 0. eps = 10000 rlstd = td.RecursiveLSTDLambda(lam=lam, eps=eps, phi=phi) rlstd.name = r"LSTD({}) $\epsilon$={}".format(lam, eps) lam = 0. eps = 1000000