def __init__(self, width=9, height=9, init_loc=(1, 1), goal_locs=[(9, 9)], gamma=0.99, slip_prob=0.00, name="four_room", is_goal_terminal=True, rand_init=False, step_cost=0.0): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' GridWorldMDP.__init__(self, width, height, init_loc, goal_locs=goal_locs, walls=self._compute_walls(width, height), gamma=gamma, slip_prob=slip_prob, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, step_cost=step_cost)
def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", puddle_rects=[(0.1, 0.8, 0.5, 0.7), (0.4, 0.7, 0.5, 0.4)], goal_locs=[[1.0, 1.0]], is_goal_terminal=True, rand_init=False, step_cost=0.0): ''' Args: gamma (float) slip_prob (float) name (str) puddle_rects (list): [(top_left_x, top_left_y), (bot_right_x, bot_right_y)] is_goal_terminal (bool) rand_init (bool) step_cost (float) ''' self.delta = 0.05 self.puddle_rects = puddle_rects GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.25, 0.6], goal_locs=goal_locs, gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, step_cost=step_cost)
def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", is_goal_terminal=True, rand_init=False): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.0,0.0], goal_locs=[1.0,1.0], gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init)
def __init__(self, width=9, height=9, init_loc=(1,1), goal_locs=[(9,9)], gamma=0.99): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' GridWorldMDP.__init__(self, width, height, init_loc, goal_locs, walls=self._compute_walls(width, height), gamma=gamma)
def __init__(self, gamma=0.99): ''' Args: gamma (float): discount factor of underlying MDP ''' self.mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0) self.actions = self.mdp.get_actions() self.num_actions = len(self.actions) self.init_state = self.mdp.init_state self.goal_locs = self.mdp.goal_locs
def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", puddle_rects=[(0.1, 0.8, 0.5, 0.7), (0.4, 0.7, 0.5, 0.4)], goal_locs=[[1.0, 1.0]], is_goal_terminal=True, rand_init=False, step_cost=0.0): ''' Args: gamma (float) slip_prob (float) name (str) puddle_rects (list): [(top_left_x, top_left_y), (bot_right_x, bot_right_y)] is_goal_terminal (bool) rand_init (bool) step_cost (float) ''' self.delta = 0.05 self.puddle_rects = puddle_rects GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.25, 0.6], goal_locs=goal_locs, gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, step_cost=step_cost)
def __init__(self, width=9, height=9, rand_init=False, is_four_room=False, num_colors=5, init_loc=(1,1), goal_locs=[(9,9)], gamma=0.99, slip_prob=0.00, name="color"): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' self.num_colors = num_colors if is_four_room: walls = self._compute_walls(width, height) else: walls = [] init_state = ColorState(init_loc[0], init_loc[1], color=random.randint(1, self.num_colors)) GridWorldMDP.__init__(self, width, height, init_loc, rand_init=rand_init, init_state=init_state, goal_locs=goal_locs, walls=walls, gamma=gamma, slip_prob=slip_prob, name=str(self.num_colors) + name)
def plan_with_vi(gamma=0.99): ''' Args: gamma (float): discount factor Running value iteration on the problem to test the correctness of the policy returned by BSS ''' mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0) value_iter = ValueIteration(mdp, sample_rate=5) value_iter.run_vi() action_seq, state_seq = value_iter.plan(mdp.get_init_state()) print "[ValueIteration] Plan for {}".format(mdp) for i in range(len(action_seq)): print 'pi({}) --> {}'.format(state_seq[i], action_seq[i])
def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", puddle_rects=[], goal_locs=[[1.0, 1.0]], is_goal_terminal=True, rand_init=False, step_cost=0.0): ''' Args: gamma (float) slip_prob (float) name (str) puddle_rects (list): [(top_left_x, top_left_y), (bot_right_x, bot_right_y)] is_goal_terminal (bool) rand_init (bool) step_cost (float) ''' self.delta = 0.2 #0.05 self.puddle_rects = puddle_rects GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.6, 0.0], goal_locs=goal_locs, gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, step_cost=step_cost) self.screen = pygame.display.set_mode((720,720)) self.gamma = 0.9
def __init__(self, width=9, height=9, init_loc=(1,1), goal_locs=[(9,9)], lava_locs=[()], gamma=0.99, slip_prob=0.00, name="four_room", is_goal_terminal=True, rand_init=False, lava_cost=0.01, step_cost=0.0): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) lava_locs (list of tuples) gamma (float) slip_prob (float) name (str) is_goal_terminal (bool) rand_init (bool) lava_cost (float) step_cost (float) ''' GridWorldMDP.__init__(self, width, height, init_loc, goal_locs=goal_locs, lava_locs=lava_locs, walls=self._compute_walls(width, height), gamma=gamma, slip_prob=slip_prob, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init, lava_cost=lava_cost, step_cost=step_cost)
def __init__(self, ltltask='F a', ap_map={'a': (1, 1)}, width=5, height=3, init_loc=(1, 1), rand_init=False, goal_locs=[(5, 3)], lava_locs=[()], walls=[], is_goal_terminal=True, gamma=0.99, init_state=None, slip_prob=0.0, step_cost=0.0, lava_cost=0.01, name="gridworld"): GridWorldMDP.__init__(self, width, height, init_loc, rand_init, goal_locs, lava_locs, walls, is_goal_terminal, gamma, init_state, slip_prob, step_cost, lava_cost, name) self.ap_map = ap_map self.automata = LTLautomata(ltltask) # construct automata self.init_q = self.automata.init_state #initialize init_state = LTLGridWorldState( self.init_loc[0], self.init_loc[1], self.init_q ) if init_state is None or self.rand_init else self.init_state MDP.__init__(self, LTLGridWorldMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma) self.cur_state = init_state
def __init__(self, gamma=0.99, slip_prob=0.00, name="puddle", is_goal_terminal=True, rand_init=False): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' self.delta = 0.01 self.puddle_rects = [(0.1, 0.8, 0.5, 0.7), (0.4, 0.7, 0.5, 0.4)] GridWorldMDP.__init__(self, width=1.0, height=1.0, init_loc=[0.25, 0.6], goal_locs=[[1.0, 1.0]], gamma=gamma, name=name, is_goal_terminal=is_goal_terminal, rand_init=rand_init)
def _transition_func(self, state, action): ''' Args: state (State) action (str) Returns (State) ''' gw_state = GridWorldState(state.x, state.y) next_gw_state = GridWorldMDP._transition_func(self, gw_state, action) # Add random color. rand_color = random.randint(1, self.num_colors) next_col_state = ColorState(next_gw_state.x, next_gw_state.y, rand_color) return next_col_state
class GridWorldGenerativeModel(object): ''' BSS requires access to a generative model of the underlying MDP. This class is an example generative model for a grid world MDP. Given a state and action, the generative model randomly samples from the distribution over next states and returns it ''' def __init__(self, gamma=0.99): ''' Args: gamma (float): discount factor of underlying MDP ''' self.mdp = GridWorldMDP(gamma=gamma, goal_locs=[(4, 3)], slip_prob=0.0) self.actions = self.mdp.get_actions() self.num_actions = len(self.actions) self.init_state = self.mdp.init_state self.goal_locs = self.mdp.goal_locs def generate(self, state, action): ''' Args: state (State): current state action (Action): action to take Returns: next_state (State): state randomly sampled the distribution over next states ''' return self.mdp.transition_func(state, action) def get_reward(self, state, action): return self.mdp.reward_func(state, action) def get_action_for_idx(self, action_idx): ''' Args: action_idx (int): index corresponding to action Returns: action (str): "up", "down" etc ''' return self.mdp.ACTIONS[action_idx] def execute_action(self, action): return self.mdp.execute_agent_action(action) def reset_mdp(self): self.mdp.reset()