Esempio n. 1
0
def neighbors(state: int, shape: Coords):
    x, y = getCoords(state, shape)

    return set([
        getState((x + 1, y), shape),
        getState((x - 1, y), shape),
        getState((x, y + 1), shape),
        getState((x, y - 1), shape),
    ])
Esempio n. 2
0
    def test_predecessor(self):
        shape = (5, 4)
        sp = getState((3, 3), shape)
        s = predecessor(sp, 0, shape)

        e = sp, getState((3, 2), shape)
        self.assertEqual(s, list(e))

        sp = getState((4, 3), shape)
        s = predecessor(sp, 3, shape)

        self.assertEqual(s, [])
Esempio n. 3
0
def actions(state: int, next_state: int, shape: Coords):
    x, y = getCoords(state, shape)

    # trick numba into know the type of this empty list
    ret: List[int] = [i for i in range(0)]

    up = getState((x, y + 1), shape)
    if up == next_state:
        ret.append(0)

    right = getState((x + 1, y), shape)
    if right == next_state:
        ret.append(1)

    down = getState((x, y - 1), shape)
    if down == next_state:
        ret.append(2)

    left = getState((x - 1, y), shape)
    if left == next_state:
        ret.append(3)

    return ret
Esempio n. 4
0
def sample(_shape: Coords, costToGoal: bool = True, seed: int = 0):
    rng = np.random.RandomState(seed)

    # collect some metadata
    width, height = _shape
    states = width * height

    # build the environment transition kernels
    _K = np.zeros((states, 4, states))
    _R = np.zeros((states, 4, states))
    _T = np.zeros((states, 4, states))
    _d0 = np.zeros(states)

    # build the set of states that the wall-builder
    # has not yet visited, initially this is every state
    unvisited = set(range(states))

    # pick a state and mark it as visited
    # we will guarantee that all paths connect to this state
    # at some point
    start = Random.choice(unvisited, rng)
    unvisited.remove(start)

    # the terminal state is in the top right of the maze
    terminal_state = getState((width - 1, height - 1), _shape)
    _T[terminal_state, :, terminal_state] = 1

    # build a simple -1 per step reward function
    r = -1 if costToGoal else 0
    rt = -1 if costToGoal else 1

    # sample paths until we've visited every state
    while len(unvisited) > 0:
        path = _samplePath(unvisited, _shape, rng)

        # walk the path and "activate" every transition from
        # prev -> cell. need to carefully handle termination states
        # also make sure the agent can walk backwards through the space
        # by connecting cell -> prev
        prev = None
        for cell in path:
            if prev is not None:
                unvisited.remove(prev)

                # mark all available actions from prev -> cell
                # might be multiple actions due to bumping into walls
                for a in actions(prev, cell, _shape):
                    _K[prev, a, cell] = 1
                    _R[prev, a, cell] = r

                    if cell == terminal_state:
                        _T[prev, a, cell] = 1
                        _R[prev, a, cell] = rt

                # mark all available actions from cell -> prev
                for a in actions(cell, prev, _shape):
                    _K[cell, a, prev] = 1
                    _R[cell, a, prev] = r

                    if prev == terminal_state:
                        _T[cell, a, prev] = 1
                        _R[cell, a, prev] = rt

            prev = cell

    # now we need to make sure all self-connections exist
    # that is, if I run into a wall then I stay in the same state
    for state in range(states):
        for a in range(4):
            # if this action doesn't lead anywhere, then it needs to be a self-transition
            if _K[state, a].sum() == 0:
                _K[state, a, state] = 1
                _R[state, a, state] = r

    # set start state as the bottom left
    start = getState((0, 0), _shape)
    _d0[start] = 1

    class WilsonMaze(_WilsonMaze):
        shape = _shape

        num_states = states
        num_actions = 4

        K = _K
        Rs = _R
        T = _T
        d0 = _d0

    return WilsonMaze
Esempio n. 5
0
 def getState(cls, coords: Coords):
     return getState(coords, cls.shape)