Esempio n. 1
0
    def __init__(self, nrows = 8, ncols = 8):
        self.nrows = nrows
        self.ncols = ncols
        self.nstates = nrows * ncols
        self.nactions = 4

        self.left_edge = []
        self.right_edge = []
        self.top_edge = []
        self.bottom_edge = []
        self.gamma = 0.9

        for x in range(self.nstates):

            # note that edges are not disjoint, so we cannot use elif

            if x % self.ncols == 0:
                self.left_edge.append(x)

            if 0 <= x < self.ncols:
                self.top_edge.append(x)

            if x % self.ncols == self.ncols - 1:
                self.right_edge.append(x)

            if (self.nrows - 1) * self.ncols <= x <= self.nstates:
                self.bottom_edge.append(x)

        SparseMDP.__init__(self, nstates = self.nrows * self.ncols, nactions = 4)
Esempio n. 2
0
 def __init__(self, size = 10, rsize = 10):
    
     self.size = 10
     self.goal_index = 0
     self.state_names = [(x,y) for x in range(self.size) for y in range(self.size)] # x is the agent location, y is the goal location
     self.states = range(len(self.state_names))
     self.actions = [0,1] # left or right
     self.nstates = self.size ** 2
     self.nactions = 2
     self.endstates = []
     for (i,s) in enumerate(self.state_names):
         if s[0] == s[1]:
             self.endstates.append(i)
     SparseMDP.__init__(self, nstates = self.nstates, nactions = self.nactions)
Esempio n. 3
0
    def __init__(self, size=10, rsize=10):

        self.size = 10
        self.goal_index = 0
        self.state_names = [
            (x, y) for x in range(self.size) for y in range(self.size)
        ]  # x is the agent location, y is the goal location
        self.states = range(len(self.state_names))
        self.actions = [0, 1]  # left or right
        self.nstates = self.size**2
        self.nactions = 2
        self.endstates = []
        for (i, s) in enumerate(self.state_names):
            if s[0] == s[1]:
                self.endstates.append(i)
        SparseMDP.__init__(self, nstates=self.nstates, nactions=self.nactions)
Esempio n. 4
0
    def __init__(self, nrows = 5, ncols = 5, actions = None, walls=[(1,1),(1,2),(1,3),(2,1),(2,2),(2,3),(3,1),(3,2),(3,3)], endstates = [0]):
        self.nrows = nrows
        self.ncols = ncols

        self.walls = walls
        grid = [self.coords(i) for i in range(self.nrows * self.ncols)]
        grid = [s for s in grid if not s in self.walls]
        self.states = dict([(i,s) for (i,s) in enumerate(grid)])
        self.rstates = dict([(s,i) for (i,s) in enumerate(grid)]) # reverse lookup by grid coords

        if actions is None:
            actions = range(8)

        self.allowed_actions = actions
        self.nstates = len(self.states)
        self.nactions = len(actions)
        self.endstates = endstates

        SparseMDP.__init__(self, nstates = self.nstates, nactions = self.nactions)
Esempio n. 5
0
    def __init__(self, nrows=5, ncols=5):
        self.nrows = nrows
        self.ncols = ncols
        grid = [self.coords(i) for i in range(self.nrows * self.ncols)]
        grid = [(s,t) for s in grid for t in grid]
        self.states = dict([(i,s) for (i,s) in enumerate(grid)])
        self.rstates = dict([(s,i) for (i,s) in enumerate(grid)])
        self.nstates = len(self.states)
        self.current = 0
        self.nactions = 8
        self.endstates = []

        self.observations = []
        for i in range(self.nstates):
            o = self.states[i]
            self.observations.append([o[0][0],o[0][1],o[1][0],o[1][1]])
        self.observations = np.array(self.observations)

        for (s,t) in self.rstates:
            if s[0] == t[0] and s[1] == t[1]:
                self.endstates.append(self.rstates[(s,t)])

        SparseMDP.__init__(self, nstates = self.nstates, nactions = self.nactions)