def __init__(self, nrows = 8, ncols = 8): self.nrows = nrows self.ncols = ncols self.nstates = nrows * ncols self.nactions = 4 self.left_edge = [] self.right_edge = [] self.top_edge = [] self.bottom_edge = [] self.gamma = 0.9 for x in range(self.nstates): # note that edges are not disjoint, so we cannot use elif if x % self.ncols == 0: self.left_edge.append(x) if 0 <= x < self.ncols: self.top_edge.append(x) if x % self.ncols == self.ncols - 1: self.right_edge.append(x) if (self.nrows - 1) * self.ncols <= x <= self.nstates: self.bottom_edge.append(x) SparseMDP.__init__(self, nstates = self.nrows * self.ncols, nactions = 4)
def __init__(self, size = 10, rsize = 10): self.size = 10 self.goal_index = 0 self.state_names = [(x,y) for x in range(self.size) for y in range(self.size)] # x is the agent location, y is the goal location self.states = range(len(self.state_names)) self.actions = [0,1] # left or right self.nstates = self.size ** 2 self.nactions = 2 self.endstates = [] for (i,s) in enumerate(self.state_names): if s[0] == s[1]: self.endstates.append(i) SparseMDP.__init__(self, nstates = self.nstates, nactions = self.nactions)
def __init__(self, size=10, rsize=10): self.size = 10 self.goal_index = 0 self.state_names = [ (x, y) for x in range(self.size) for y in range(self.size) ] # x is the agent location, y is the goal location self.states = range(len(self.state_names)) self.actions = [0, 1] # left or right self.nstates = self.size**2 self.nactions = 2 self.endstates = [] for (i, s) in enumerate(self.state_names): if s[0] == s[1]: self.endstates.append(i) SparseMDP.__init__(self, nstates=self.nstates, nactions=self.nactions)
def __init__(self, nrows = 5, ncols = 5, actions = None, walls=[(1,1),(1,2),(1,3),(2,1),(2,2),(2,3),(3,1),(3,2),(3,3)], endstates = [0]): self.nrows = nrows self.ncols = ncols self.walls = walls grid = [self.coords(i) for i in range(self.nrows * self.ncols)] grid = [s for s in grid if not s in self.walls] self.states = dict([(i,s) for (i,s) in enumerate(grid)]) self.rstates = dict([(s,i) for (i,s) in enumerate(grid)]) # reverse lookup by grid coords if actions is None: actions = range(8) self.allowed_actions = actions self.nstates = len(self.states) self.nactions = len(actions) self.endstates = endstates SparseMDP.__init__(self, nstates = self.nstates, nactions = self.nactions)
def __init__(self, nrows=5, ncols=5): self.nrows = nrows self.ncols = ncols grid = [self.coords(i) for i in range(self.nrows * self.ncols)] grid = [(s,t) for s in grid for t in grid] self.states = dict([(i,s) for (i,s) in enumerate(grid)]) self.rstates = dict([(s,i) for (i,s) in enumerate(grid)]) self.nstates = len(self.states) self.current = 0 self.nactions = 8 self.endstates = [] self.observations = [] for i in range(self.nstates): o = self.states[i] self.observations.append([o[0][0],o[0][1],o[1][0],o[1][1]]) self.observations = np.array(self.observations) for (s,t) in self.rstates: if s[0] == t[0] and s[1] == t[1]: self.endstates.append(self.rstates[(s,t)]) SparseMDP.__init__(self, nstates = self.nstates, nactions = self.nactions)