コード例 #1
0
        mserrors[:, i] = val - np.mean(val)
        print noise, lambdas[np.argmin(val)]
    #mserrors -= mserrors.min(axis=1)[:,None]
    #mserrors /= mserrors.max(axis=1)[:,None]
    return mserrors


if __name__ == "__main__":
    plt.ion()
    fn = "data/noise_lambda.npz"
    if os.path.exists(fn):
        d = np.load(fn)
        globals().update(d)
    else:
        n_feat = 20
        phi = features.lin_random(n_feat, n, constant=True)
        mserrors2 = lambda_errors(phi, lambdas, noises)
        phi = features.eye(n)
        mserrors1 = lambda_errors(phi, lambdas, noises)
        np.savez(fn,
                 mserrors1=mserrors1,
                 mserrors2=mserrors2,
                 noises=noises,
                 lambdas=lambdas)

    mserrors2 -= mserrors2.min(axis=0)
    mserrors2 /= mserrors2.max(axis=0)
    mserrors1 -= mserrors1.min(axis=0)
    #mserrors1 /= mserrors1.max(axis=0)
    plt.figure()
    mymap = mpl.colors.LinearSegmentedColormap.from_list(
コード例 #2
0
ファイル: disc_random_on.py プロジェクト: amoliu/tdlearn
Uniformly sampled random MDP with discrete states, on-policy case
"""
__author__ = "Christoph Dann <*****@*****.**>"

import td
import examples
from task import LinearDiscreteValuePredictionTask
import numpy as np
import features
import policies
import regtd
n = 400
n_a = 10
n_feat = 200
mdp = examples.RandomMDP(n, n_a)
phi = features.lin_random(n_feat, n, constant=True)
gamma = .95
np.random.seed(3)
beh_pol = policies.Discrete(np.random.rand(n, n_a))
tar_pol = beh_pol
task = LinearDiscreteValuePredictionTask(mdp, gamma, phi, np.zeros(phi.dim),
                                         policy=beh_pol)


methods = []
alpha = 0.007
mu = .0001
gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi)
gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu)
gtd.color = "r"
methods.append(gtd)