def __init__(self, nrows = 8, ncols = 8): self.nrows = nrows self.ncols = ncols self.nstates = nrows * ncols self.nactions = 4 self.left_edge = [] self.right_edge = [] self.top_edge = [] self.bottom_edge = [] self.gamma = 0.9 for x in range(self.nstates): # note that edges are not disjoint, so we cannot use elif if x % self.ncols == 0: self.left_edge.append(x) if 0 <= x < self.ncols: self.top_edge.append(x) if x % self.ncols == self.ncols - 1: self.right_edge.append(x) if (self.nrows - 1) * self.ncols <= x <= self.nstates: self.bottom_edge.append(x) MDP.__init__(self, nstates = self.nrows * self.ncols, nactions = 4)
def __init__(self): # actions: # 0 == L # 1 == R self.gamma = 0.9 self.feature_cnt = 9 MDP.__init__(self, nstates = 4, nactions = 2)
def __init__(self, nrows = 5, ncols = 5, walls=[(1,1),(1,2),(1,3),(2,1),(2,2),(2,3),(3,1),(3,2),(3,3)], endstates = [0]): self.nrows = nrows self.ncols = ncols self.walls = walls grid = [self.coords(i) for i in range(self.nrows * self.ncols)] grid = [s for s in grid if not s in self.walls] self.states = dict([(i,s) for (i,s) in enumerate(grid)]) self.rstates = dict([(s,i) for (i,s) in enumerate(grid)]) # reverse lookup by grid coords self.nstates = len(self.states) self.nactions = 8 self.endstates = endstates MDP.__init__(self, nstates = self.nstates, nactions = self.nactions)
def reset(self): self.step = 0 return MDP.reset(self)