Beispiel #1
0
 def __init__(self,
              width=9,
              height=9,
              init_loc=(1, 1),
              goal_locs=[(9, 9)],
              gamma=0.99,
              slip_prob=0.00,
              name="four_room",
              is_goal_terminal=True,
              rand_init=False,
              step_cost=0.0):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     GridWorldMDP.__init__(self,
                           width,
                           height,
                           init_loc,
                           goal_locs=goal_locs,
                           walls=self._compute_walls(width, height),
                           gamma=gamma,
                           slip_prob=slip_prob,
                           name=name,
                           is_goal_terminal=is_goal_terminal,
                           rand_init=rand_init,
                           step_cost=step_cost)
Beispiel #2
0
 def __init__(self,
              gamma=0.99,
              slip_prob=0.00,
              name="puddle",
              puddle_rects=[(0.1, 0.8, 0.5, 0.7), (0.4, 0.7, 0.5, 0.4)],
              goal_locs=[[1.0, 1.0]],
              is_goal_terminal=True,
              rand_init=False,
              step_cost=0.0):
     '''
     Args:
         gamma (float)
         slip_prob (float)
         name (str)
         puddle_rects (list): [(top_left_x, top_left_y), (bot_right_x, bot_right_y)]
         is_goal_terminal (bool)
         rand_init (bool)
         step_cost (float)
     '''
     self.delta = 0.05
     self.puddle_rects = puddle_rects
     GridWorldMDP.__init__(self,
                           width=1.0,
                           height=1.0,
                           init_loc=[0.25, 0.6],
                           goal_locs=goal_locs,
                           gamma=gamma,
                           name=name,
                           is_goal_terminal=is_goal_terminal,
                           rand_init=rand_init,
                           step_cost=step_cost)
Beispiel #3
0
 def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", is_goal_terminal=True, rand_init=False):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.0,0.0], goal_locs=[1.0,1.0], gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init)
Beispiel #4
0
 def __init__(self, width=9, height=9, init_loc=(1,1), goal_locs=[(9,9)], gamma=0.99):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     GridWorldMDP.__init__(self, width, height, init_loc, goal_locs, walls=self._compute_walls(width, height), gamma=gamma)
 def __init__(self, gamma=0.99):
     '''
     Args:
         gamma (float): discount factor of underlying MDP
     '''
     self.mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0)
     self.actions = self.mdp.get_actions()
     self.num_actions = len(self.actions)
     self.init_state = self.mdp.init_state
     self.goal_locs = self.mdp.goal_locs
Beispiel #6
0
 def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", puddle_rects=[(0.1, 0.8, 0.5, 0.7), (0.4, 0.7, 0.5, 0.4)], goal_locs=[[1.0, 1.0]], is_goal_terminal=True, rand_init=False, step_cost=0.0):
     '''
     Args:
         gamma (float)
         slip_prob (float)
         name (str)
         puddle_rects (list): [(top_left_x, top_left_y), (bot_right_x, bot_right_y)]
         is_goal_terminal (bool)
         rand_init (bool)
         step_cost (float)
     '''
     self.delta = 0.05
     self.puddle_rects = puddle_rects
     GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.25, 0.6], goal_locs=goal_locs, gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, step_cost=step_cost)
Beispiel #7
0
 def __init__(self, width=9, height=9, rand_init=False, is_four_room=False, num_colors=5, init_loc=(1,1), goal_locs=[(9,9)], gamma=0.99, slip_prob=0.00, name="color"):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     self.num_colors = num_colors
     if is_four_room:
         walls = self._compute_walls(width, height)
     else:
         walls = []
     init_state = ColorState(init_loc[0], init_loc[1], color=random.randint(1, self.num_colors))
     GridWorldMDP.__init__(self, width, height, init_loc, rand_init=rand_init, init_state=init_state, goal_locs=goal_locs, walls=walls, gamma=gamma, slip_prob=slip_prob, name=str(self.num_colors) + name)
def plan_with_vi(gamma=0.99):
    '''
    Args:
        gamma (float): discount factor

    Running value iteration on the problem to test the correctness of the policy returned by BSS
    '''
    mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0)
    value_iter = ValueIteration(mdp, sample_rate=5)
    value_iter.run_vi()

    action_seq, state_seq = value_iter.plan(mdp.get_init_state())

    print "[ValueIteration] Plan for {}".format(mdp)
    for i in range(len(action_seq)):
        print 'pi({}) --> {}'.format(state_seq[i], action_seq[i])
Beispiel #9
0
 def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", puddle_rects=[], goal_locs=[[1.0, 1.0]], is_goal_terminal=True, rand_init=False, step_cost=0.0):
     '''
     Args:
         gamma (float)
         slip_prob (float)
         name (str)
         puddle_rects (list): [(top_left_x, top_left_y), (bot_right_x, bot_right_y)]
         is_goal_terminal (bool)
         rand_init (bool)
         step_cost (float)
     '''
     self.delta = 0.2 #0.05
     self.puddle_rects = puddle_rects
     GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.6, 0.0], goal_locs=goal_locs, gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, step_cost=step_cost)
     self.screen = pygame.display.set_mode((720,720))
     self.gamma = 0.9
 def __init__(self, width=9, height=9, init_loc=(1,1), goal_locs=[(9,9)], lava_locs=[()], gamma=0.99, slip_prob=0.00, name="four_room", is_goal_terminal=True, rand_init=False, lava_cost=0.01, step_cost=0.0):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
         lava_locs (list of tuples)
         gamma (float)
         slip_prob (float)
         name (str)
         is_goal_terminal (bool)
         rand_init (bool)
         lava_cost (float)
         step_cost (float)
     '''
     GridWorldMDP.__init__(self, width, height, init_loc, goal_locs=goal_locs, lava_locs=lava_locs, walls=self._compute_walls(width, height), gamma=gamma, slip_prob=slip_prob, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, lava_cost=lava_cost, step_cost=step_cost)
    def __init__(self,
                 ltltask='F a',
                 ap_map={'a': (1, 1)},
                 width=5,
                 height=3,
                 init_loc=(1, 1),
                 rand_init=False,
                 goal_locs=[(5, 3)],
                 lava_locs=[()],
                 walls=[],
                 is_goal_terminal=True,
                 gamma=0.99,
                 init_state=None,
                 slip_prob=0.0,
                 step_cost=0.0,
                 lava_cost=0.01,
                 name="gridworld"):

        GridWorldMDP.__init__(self, width, height, init_loc, rand_init,
                              goal_locs, lava_locs, walls, is_goal_terminal,
                              gamma, init_state, slip_prob, step_cost,
                              lava_cost, name)

        self.ap_map = ap_map
        self.automata = LTLautomata(ltltask)  # construct automata
        self.init_q = self.automata.init_state
        #initialize
        init_state = LTLGridWorldState(
            self.init_loc[0], self.init_loc[1], self.init_q
        ) if init_state is None or self.rand_init else self.init_state
        MDP.__init__(self,
                     LTLGridWorldMDP.ACTIONS,
                     self._transition_func,
                     self._reward_func,
                     init_state=init_state,
                     gamma=gamma)
        self.cur_state = init_state
Beispiel #12
0
 def __init__(self,
              gamma=0.99,
              slip_prob=0.00,
              name="puddle",
              is_goal_terminal=True,
              rand_init=False):
     '''
     Args:
         height (int)
         width (int)
         init_loc (tuple: (int, int))
         goal_locs (list of tuples: [(int, int)...])
     '''
     self.delta = 0.01
     self.puddle_rects = [(0.1, 0.8, 0.5, 0.7), (0.4, 0.7, 0.5, 0.4)]
     GridWorldMDP.__init__(self,
                           width=1.0,
                           height=1.0,
                           init_loc=[0.25, 0.6],
                           goal_locs=[[1.0, 1.0]],
                           gamma=gamma,
                           name=name,
                           is_goal_terminal=is_goal_terminal,
                           rand_init=rand_init)
Beispiel #13
0
    def _transition_func(self, state, action):
        '''
        Args:
            state (State)
            action (str)

        Returns
            (State)
        '''

        gw_state = GridWorldState(state.x, state.y)

        next_gw_state = GridWorldMDP._transition_func(self, gw_state, action)

        # Add random color.
        rand_color = random.randint(1, self.num_colors)
        next_col_state = ColorState(next_gw_state.x, next_gw_state.y, rand_color)

        return next_col_state
class GridWorldGenerativeModel(object):
    '''
    BSS requires access to a generative model of the underlying MDP. This class is an example generative model for a
    grid world MDP. Given a state and action, the generative model randomly samples from the distribution over next
    states and returns it
    '''
    def __init__(self, gamma=0.99):
        '''
        Args:
            gamma (float): discount factor of underlying MDP
        '''
        self.mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0)
        self.actions = self.mdp.get_actions()
        self.num_actions = len(self.actions)
        self.init_state = self.mdp.init_state
        self.goal_locs = self.mdp.goal_locs

    def generate(self, state, action):
        '''
        Args:
            state (State): current state
            action (Action): action to take
        Returns:
            next_state (State): state randomly sampled the distribution over next states
        '''
        return self.mdp.transition_func(state, action)

    def get_reward(self, state, action):
        return self.mdp.reward_func(state, action)

    def get_action_for_idx(self, action_idx):
        '''
        Args:
            action_idx (int): index corresponding to action

        Returns:
            action (str): "up", "down" etc
        '''
        return self.mdp.ACTIONS[action_idx]

    def execute_action(self, action):
        return self.mdp.execute_agent_action(action)

    def reset_mdp(self):
        self.mdp.reset()