Beispiel #1
0
Feature representations used in
TODO: cite <Sutton et al. 2009>
"""
def invertedFeatures(n: int):
    # additive inverse of tabular (hence name)
    m = 1 - tabularFeatures(n)
    return _normRows(m)

@try2jit
def dependentFeatures(n: int):
    nfeats = int(np.floor(n / 2) + 1)
    m = np.zeros((n, nfeats))

    idx = 0
    for i in range(nfeats):
        m[idx, 0: i + 1] = 1
        idx += 1

    for i in range(nfeats - 1, 0, -1):
        m[idx, -i:] = 1
        idx += 1

    return _normRows(m)

def tabularFeatures(n: int):
    return np.eye(n)

addToCategory('ope', RandomWalk)
addToCategory('random-walk', RandomWalk)
addToCategory('finite-dynamics', RandomWalk)
Beispiel #2
0

class BECounterexample(FiniteDynamics):
    num_states = 3
    num_actions = 2

    K = immutable(_buildTransitionKernel())
    Rs = immutable(_buildRewardKernel())

    T = immutable(np.zeros((3, 2, 3)))
    d0 = immutable(np.array([1., 0, 0]))


# some utility functions to encode other important parts of the problem spec
# not necessarily environment specific, but this is as good a place as any to store them
def behaviorPolicy(s: int):
    return np.array([0.5, 0.5])


def representationMatrix():
    return np.array([
        [1., 0],
        [0, 1],
        [0, 1],
    ])


addToCategory('ope', BECounterexample)
addToCategory('ope-counterexample', BECounterexample)
addToCategory('finite-dynamics', BECounterexample)
Beispiel #3
0
    def start(self):
        # perturb the start state multiplicatively by a normal random amount for each component of system
        eps = 1
        if self.random_start > 0:
            eps = self.start_rng.normal(1, self.random_start, size=6)

        # start in non-healthy stable state
        start = np.array([163573, 5., 11945, 46, 63919, 24]) * eps
        self._state = start

        return _transform(start)

    def step(self, action: int):
        sp = _nextState(self._state, action, self.dt, self.actionEffects)
        r = HIVTreatment.reward(self._state, action, sp)
        t = HIVTreatment.terminal(self._state, action, sp)

        self._state = sp

        return (r, _transform(sp), t)

    def setState(self, state: np.ndarray):
        self._state = state.copy()

    def copy(self):
        m = HIVTreatment(self._seed)
        m._state = self._state.copy()
        return m

addToCategory('classic-control', HIVTreatment)
Beispiel #4
0
        # Split top/bottom left
        WallState((0, 5)),
        WallState((2, 5)),
        WallState((3, 5)),
        WallState((4, 5)),
        WallState((5, 5)),
        # Split top left/right
        WallState((5, 10)),
        WallState((5, 9)),
        WallState((5, 7)),
        WallState((5, 6)),
        # Split bottom left/right
        WallState((5, 4)),
        WallState((5, 3)),
        WallState((5, 2)),
        WallState((5, 0)),
        # Split top/bottom right
        WallState((6, 4)),
        WallState((7, 4)),
        WallState((9, 4)),
        WallState((10, 4)),
    ])

    return fourRoomsBuilder.build()


FourRooms = build()

addToCategory('gridworld', FourRooms)
addToCategory('finite-dynamics', FourRooms)
Beispiel #5
0
        sp = self.nextState(self._state, action)
        r = self.reward(self._state, action, sp)
        t = self.terminal(self._state, action, sp)

        self._state = sp

        return (r, _transform(sp), t)

    def setState(self, state: np.ndarray):
        self._state = state.copy()

    def copy(self):
        m = Acrobot(randomize=self.randomize, seed=self._seed)
        m._state = self._state.copy()
        m.physical_constants = self.physical_constants
        m.per_step_constants = self.per_step_constants

        # copy derivative function because state variables changed
        m._dsdt = self._dsdt

        return m


class StochasticAcrobot(Acrobot):
    def __init__(self, seed: int = 0):
        super().__init__(randomize=True, seed=seed)


addToCategory('classic-control', Acrobot)
addToCategory('stochastic', StochasticAcrobot)
Beispiel #6
0
        m.physical_constants = self.physical_constants
        m.per_step_constants = self.per_step_constants

        # because we are changing the physics _after_ the derivatives are being precomputed
        # we need to replace the derivatives with the correct eqns for these constants
        m._dsdt = self._dsdt

        return m

class StochasticCartpole(Cartpole):
    def __init__(self, seed: int = 0):
        super().__init__(randomize=True, seed=seed)

class ContinuousActionCartpole(Cartpole):
    def nextState(self, s: np.ndarray, force: float):
        # get per-step constants
        dt = self.per_step_constants['dt'].sample(self.rng)

        force = np.clip(force, -12, 12)
        sa = np.append(s, force)
        spa = euler(self._dsdt, sa, np.array([0, dt]))

        # only need the last result of the integration
        spa = spa[-1]
        sp = spa[:-1]

        return sp

addToCategory('classic-control', Cartpole)
addToCategory('stochastic', StochasticCartpole)
Beispiel #7
0
            # probably there is only one start state
            # but in case we ever define multiple, then uniform random
            # chance to transition to any of them
            for start in starts:
                K[s, a, start] = 1.0 / len(starts)


def build(shape: Coords = (12, 4)):
    cliffWorldBuilder = GridWorldBuilder(shape)

    # start in bottom left
    cliffWorldBuilder.addElement(StartState((0, 0)))

    # end in bottom right without additional reward
    cliffWorldBuilder.addElement(GoalState((shape[0] - 1, 0), -1))

    # with a cliff in every state in-between
    cliffWorldBuilder.addElements(
        [Cliff((x, 0)) for x in range(1, shape[0] - 1)])

    cliffWorldBuilder.costToGoal = True

    return cliffWorldBuilder.build()


CliffWorld = build()

addToCategory('gridworld', CliffWorld)
addToCategory('finite-dynamics', CliffWorld)
addToCategory('sutton-barto', CliffWorld)
Beispiel #8
0
    d0 = immutable(np.array([1.] + [0.] * 12))

# some utility functions to encode other important parts of the problem spec
# not necessarily environment specific, but this is as good a place as any to store them
def behaviorPolicy(s: int):
    if s <= 10:
        return np.array([0.5, 0.5])

    return np.array([1.0, 0])

def representationMatrix():
    return np.array([
        [1,    0,    0,    0   ],  # noqa: E241
        [0.75, 0.25, 0,    0   ],  # noqa: E241
        [0.5,  0.5,  0,    0   ],  # noqa: E241
        [0.25, 0.75, 0,    0   ],  # noqa: E241
        [0,    1,    0,    0   ],  # noqa: E241
        [0,    0.75, 0.25, 0   ],  # noqa: E241
        [0,    0.5,  0.5,  0   ],  # noqa: E241
        [0,    0.25, 0.75, 0   ],  # noqa: E241
        [0,    0,    1,    0   ],  # noqa: E241
        [0,    0,    0.75, 0.25],  # noqa: E241
        [0,    0,    0.5,  0.5 ],  # noqa: E241
        [0,    0,    0.25, 0.75],  # noqa: E241
        [0,    0,    0,    1   ],  # noqa: E241
    ])

addToCategory('ope', BoyanChain)
addToCategory('random-walk', BoyanChain)
addToCategory('finite-dynamics', BoyanChain)
Beispiel #9
0
        m.per_step_constants = self.per_step_constants

        m._dsdt = self._dsdt

        return m


class GymMountainCar(MountainCar):
    def __init__(self, seed: int = 0):
        super().__init__(randomize=False, seed=seed)

    def nextState(self, s: np.ndarray, a: int):
        return _nextState(s, a)


class StochasticMountainCar(MountainCar):
    def __init__(self, seed: int = 0):
        super().__init__(randomize=True, seed=seed)


class ContinuousActionMountainCar(MountainCar):
    def nextState(self, s: np.ndarray, a: float):
        force = np.clip(a, -3, 3)

        return self._integrate(s, force)


addToCategory('classic-control', MountainCar)
addToCategory('sutton-barto', GymMountainCar)
addToCategory('stochastic', StochasticMountainCar)