Esempio n. 1
0
class KolterCounterexample(FiniteDynamics):
    num_states = 2
    num_actions = 2

    K = immutable(_buildTransition())
    Rs = immutable(_buildReward())
    T = immutable(np.zeros((2, 2, 2)))
    d0 = immutable(np.array([1., 0]))
Esempio n. 2
0
class BECounterexample(FiniteDynamics):
    num_states = 3
    num_actions = 2

    K = immutable(_buildTransitionKernel())
    Rs = immutable(_buildRewardKernel())

    T = immutable(np.zeros((3, 2, 3)))
    d0 = immutable(np.array([1., 0, 0]))
Esempio n. 3
0
class BairdCounterexample(FiniteDynamics):
    num_states = 7
    num_actions = 2

    K = immutable(_buildTransitionKernel())
    Rs = immutable(np.zeros((7, 2, 7)))

    T = immutable(np.zeros((7, 2, 7)))
    d0 = immutable(np.array([0, 0, 0, 0, 0, 0, 1.]))
Esempio n. 4
0
class BoyanChain(FiniteDynamics):
    num_states = 13
    num_actions = 2

    K = immutable(_buildTransitionKernel())
    Rs = immutable(_buildRewardKernel())

    T = immutable(_buildTerminationKernel())
    d0 = immutable(np.array([1.] + [0.] * 12))
Esempio n. 5
0
def buildProblem(eps: float = 0.01, p: Optional[float] = None):
    # if a probability is not specified, then use the counterexample probability
    if p is None:
        p = (2961 + 45240 * eps + 40400 * eps**2) / (4141 + 84840 * eps +
                                                     40400 * eps**2)

    mu = immutable(np.array([p, 1 - p]))

    def behavior(s: int):
        return mu

    pi = immutable(np.array([0.5, 0.5]))

    def target(s: int):
        return pi

    X = immutable(np.array([
        [1],
        [1.05 + eps],
    ]))

    gamma = 0.99

    return behavior, target, X, gamma