Python Discrete Examples

Programming Language: Python

Namespace/Package Name: policies

Method/Function: Discrete

Examples at hotexamples.com: 4

Python Discrete - 4 examples found. These are the top rated real world Python examples of policies.Discrete extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def lambda_errors(phi, lambdas, noises):
    mserrors = np.zeros((len(lambdas), len(noises)))
    variances = np.zeros(len(noises))
    a = np.ones((n, n)) / n
    b = np.zeros((n, n))
    for i in range(n - 1):
        b[i, i + 1] = 1.
    b[-1, 0] = 1.
    for i, noise in enumerate(noises):
        c = noise * a + (1 - noise) * b
        c /= c.sum(axis=1)[:, None]
        beh_pol = policies.Discrete(c)
        task = LinearDiscreteValuePredictionTask(mdp,
                                                 gamma,
                                                 phi,
                                                 np.zeros(phi.dim),
                                                 policy=beh_pol)
        d = globals().copy()
        d["phi"] = phi
        d["task"] = task
        mean, std, raw = run_experiment(n_jobs=-1, **d)
        val = mean[:, -1, n:]
        val[mean[:, -1, n:] > mean[0, -1, 0]] = mean[0, -1, 0]
        val = val.mean(axis=1)
        mserrors[:, i] = val - np.mean(val)
        print noise, lambdas[np.argmin(val)]
    #mserrors -= mserrors.min(axis=1)[:,None]
    #mserrors /= mserrors.max(axis=1)[:,None]
    return mserrors

Example #2

Show file

import numpy as np
import matplotlib.pyplot as plt
import features
import policies

n = 20
n_random = 800
mdp = examples.CorruptedChain(n_states=n)
phi = features.corrupted_rbfs(n_S=n, n_rbfs=5, n_random=n_random)
gamma = .9
n_feat = phi.dim
p0 = np.zeros(n_feat)
pol = np.zeros((n, 2))
pol[:10, 0] = 1
pol[10:, 1] = 1
policy = policies.Discrete(prop_table=pol)
task = LinearDiscreteValuePredictionTask(mdp, gamma, phi, p0, policy=policy)

# define the methods to examine
methods = []  # [td0, gtd, gtd2]

lstd = td.RecursiveLSTDLambdaJP(lam=0, eps=1000, phi=phi)
lstd.name = r"LSTD({}) $\ell_2 \tau={}$".format(0, 0)
lstd.color = "b"
methods.append(lstd)
#for eps in np.power(10,np.arange(-1,4)):
lstd = td.LSTDLambdaJP(lam=0, tau=0.8, phi=phi)
lstd.name = r"LSTD({}) $\ell_2 \tau={}$".format(0, .8)
lstd.color = "b"
#methods.append(lstd)

Example #3

Show file

import td
import examples
from task import LinearDiscreteValuePredictionTask
import numpy as np
import features
import policies
import regtd
n = 400
n_a = 10
n_feat = 200
mdp = examples.RandomMDP(n, n_a)
phi = features.lin_random(n_feat, n, constant=True)
gamma = .95
np.random.seed(3)
beh_pol = policies.Discrete(np.random.rand(n, n_a))
tar_pol = policies.Discrete(np.random.rand(n, n_a))
task = LinearDiscreteValuePredictionTask(mdp,
                                         gamma,
                                         phi,
                                         np.zeros(phi.dim),
                                         policy=beh_pol,
                                         target_policy=tar_pol)

methods = []
alpha = 0.007
mu = .0001
gtd = td.GTD(alpha=alpha, beta=mu * alpha, phi=phi)
gtd.name = r"GTD $\alpha$={} $\mu$={}".format(alpha, mu)
gtd.color = "r"
methods.append(gtd)

Example #4

Show file

"""
Experiment that shows arbitrary off-policy behavior of TD
"""
__author__ = "Christoph Dann <*****@*****.**>"
import td
import examples
import numpy as np
import features
import matplotlib.pyplot as plt
from task import LinearDiscreteValuePredictionTask
import policies
n = 7
beh_pi = np.ones((n + 1, 2))
beh_pi[:, 0] = float(n) / (n + 1)
beh_pi[:, 1] = float(1) / (n + 1)
beh_pol = policies.Discrete(prop_table=beh_pi)
target_pi = np.zeros((n + 1, 2))
target_pi[:, 0] = 0
target_pi[:, 1] = 1
target_pol = policies.Discrete(prop_table=target_pi)

mdp = examples.BairdStarExample(n)
phi = features.linear_blended(n + 1)

methods = []

gamma = 0.99
task = LinearDiscreteValuePredictionTask(mdp,
                                         gamma,
                                         phi,
                                         np.asarray(n * [1.] + [10., 1.]),