예제 #1
0
    def __init__(self, grid, goalVals, discount=.99, tau=.01, epsilon=.001):

        MDP.__init__(self, discount=discount, tau=tau, epsilon=epsilon)

        self.goalVals = goalVals
        self.grid = grid

        self.setGridWorld()
        self.valueIteration()
        self.extractPolicy()
	def __init__(self, grid, goalVals, discount=.99, tau=.01, epsilon=.001):

		MDP.__init__(self, discount=discount, tau=tau, epsilon=epsilon)

		self.goalVals = goalVals
		self.grid = grid

		self.setGridWorld()
		self.valueIteration()
		self.extractPolicy()
예제 #3
0
 def __init__(self, grid, terminals, init=(0, 0), gamma=.9):
     MDP.__init__(self, init, actlist=orientations, terminals=terminals, gamma=gamma)
     grid.reverse() ## because we want row 0 on bottom, not on top
     self.grid=grid
     self.rows=len(grid)
     self.cols=len(grid[0])
     for x in range(self.cols):
         for y in range(self.rows):
             self.reward[x, y] = grid[y][x]  # each reward is from the grid
             if grid[y][x] is not None:
                 self.states.add((x, y))     # each state is a tuple of indices
예제 #4
0
    def __init__(self, grid, terminals, init=(0, 0), gamma=.9):
        MDP.__init__(self,
                     init,
                     actlist=orientations,
                     terminals=terminals,
                     gamma=gamma)

        self.grid = grid
        self.rows = len(grid)
        self.cols = len(grid[0])

        # print(self.rows,self.cols)

        for x in range(self.cols):
            for y in range(self.rows):
                self.reward[y, x] = grid[y][x]
                if self.state_check((y, x)):
                    self.states.add((y, x))