def __init__(self, grid, goalVals, discount=.99, tau=.01, epsilon=.001): MDP.__init__(self, discount=discount, tau=tau, epsilon=epsilon) self.goalVals = goalVals self.grid = grid self.setGridWorld() self.valueIteration() self.extractPolicy()
def __init__(self, grid, terminals, init=(0, 0), gamma=.9): MDP.__init__(self, init, actlist=orientations, terminals=terminals, gamma=gamma) grid.reverse() ## because we want row 0 on bottom, not on top self.grid=grid self.rows=len(grid) self.cols=len(grid[0]) for x in range(self.cols): for y in range(self.rows): self.reward[x, y] = grid[y][x] # each reward is from the grid if grid[y][x] is not None: self.states.add((x, y)) # each state is a tuple of indices
def __init__(self, grid, terminals, init=(0, 0), gamma=.9): MDP.__init__(self, init, actlist=orientations, terminals=terminals, gamma=gamma) self.grid = grid self.rows = len(grid) self.cols = len(grid[0]) # print(self.rows,self.cols) for x in range(self.cols): for y in range(self.rows): self.reward[y, x] = grid[y][x] if self.state_check((y, x)): self.states.add((y, x))