Ejemplo n.º 1
0
    def __init__(self, env, gamma=.99):
        grid = EnvMDP.to_grid_matrix(env)
        reward = {}
        states = set()
        self.rows = len(grid)
        self.cols = len(grid[0])
        self.grid = grid

        for x in range(self.cols):
            for y in range(self.rows):
                if grid[y][x] is not None:
                    states.add((x, y))
                    reward[(x, y)] = grid[y][x]

        self.states = states

        terminals = EnvMDP.to_position(env, letter=b'GH')
        actlist = list(range(env.action_space.n))
        transitions = EnvMDP.to_transitions(env)
        init = EnvMDP.to_position(env, letter=b'S')[0]

        MDP.__init__(self,
                     init,
                     actlist=actlist,
                     terminals=terminals,
                     transitions=transitions,
                     reward=reward,
                     states=states,
                     gamma=gamma)
	def __init__(self, grid, goalVals, discount=.99, tau=.01, epsilon=.001):

		MDP.__init__(self, discount=discount, tau=tau, epsilon=epsilon)

		self.goalVals = goalVals
		self.grid = grid

		self.setGridWorld()
		self.valueIteration()
		self.extractPolicy()
Ejemplo n.º 3
0
    def __init__(self, grid, goalVals, discount=.99, tau=.01, epsilon=.001):

        MDP.__init__(self, discount=discount, tau=tau, epsilon=epsilon)

        self.goalVals = goalVals
        self.grid = grid

        self.setGridWorld()
        self.valueIteration()
        self.extractPolicy()
Ejemplo n.º 4
0
    def __init__(self, desc=None, map_name="4x4", slip_chance=0.2):
        if desc is None and map_name is None:
            raise ValueError('Must provide either desc or map_name')
        elif desc is None:
            desc = self.MAPS[map_name]
        assert ''.join(desc).count('S') == 1, "this implementation supports having exactly one initial state"
        assert all(c in "SFHG" for c in ''.join(desc)), "all cells must be either of S, F, H or G"

        self.desc = desc = np.asarray(list(map(list,desc)),dtype='str')
        self.lastaction = None

        nrow, ncol = desc.shape
        states = [(i, j) for i in range(nrow) for j in range(ncol)]
        actions = ["left","down","right","up"]

        initial_state = states[np.array(desc == b'S').ravel().argmax()]

        def move(row, col, movement):
            if movement== 'left':
                col = max(col-1,0)
            elif movement== 'down':
                row = min(row+1,nrow-1)
            elif movement== 'right':
                col = min(col+1,ncol-1)
            elif movement== 'up':
                row = max(row-1,0)
            else:
                raise("invalid action")
            return (row, col)

        transition_probs = {s : {} for s in states}
        rewards = {s : {} for s in states}
        for (row,col) in states:
            if desc[row, col]  in "GH": continue
            for action_i in range(len(actions)):
                action = actions[action_i]
                transition_probs[(row, col)][action] = {}
                rewards[(row, col)][action] = {}
                for movement_i in [(action_i - 1) % len(actions), action_i, (action_i + 1) % len(actions)]:
                    movement = actions[movement_i]
                    newrow, newcol = move(row, col, movement)
                    prob = (1. - slip_chance) if movement == action else (slip_chance / 2.)
                    if prob == 0: continue
                    if (newrow, newcol) not in transition_probs[row,col][action]:
                        transition_probs[row,col][action][newrow, newcol] = prob
                    else:
                        transition_probs[row, col][action][newrow, newcol] += prob
                    if desc[newrow, newcol] == 'G':
                        rewards[row,col][action][newrow, newcol] = 1.0

        MDP.__init__(self, transition_probs, rewards, initial_state)
Ejemplo n.º 5
0
 def __init__(self,
              rows,
              cols,
              definitiveness,
              initstate,
              terminals,
              obstacles,
              gamma=.9):
     self.rows = rows
     self.cols = cols
     self.definitiveness = definitiveness
     self.initstate = initstate
     self.terminals = terminals
     self.obstacles = obstacles
     stateset = set()
     for y in range(1, self.cols + 1):
         for x in range(1, self.rows + 1):
             stateset.add((x, y))
     actionset = {'up', 'down', 'right', 'left'}
     MDP.__init__(self, stateset, actionset, gamma)