def __init__(self, starting_room=1, gamma=0.99, slip_prob=0.0, env_file=[], constraints={}, ap_maps={}): # TODO: work initial_state = CubeL1State(starting_room, 0) #self.goal_state = CubeL1State(goal_room,0, is_terminal=True) #self.terminal_func = lambda state: state._is_terminal# == 1 self.constraints = constraints self.ap_maps = ap_maps self.slip_prob = slip_prob if len(env_file) != 0: self.cube_env = env_file[0] CubeL1MDP.ACTIONS = self.cube_env['L1ACTIONS'] else: print("Input: env_file") MDP.__init__(self, CubeL1MDP.ACTIONS, self._transition_func, self._reward_func, init_state=initial_state, gamma=gamma)
def __init__(self, graph, goals={}, start_state={}, is_goal_terminal=True, gamma=0.99, init_state=None, slip_prob=0.0, step_cost=0.0, name="gridworld"): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) lava_locs (list of tuples: [(int, int)...]): These locations return -1 reward. ''' # Setup init location. self.graph = graph self.goals = goals MDP.__init__(self, self.graph.keys(), self._transition_func, self._reward_func, init_state=GraphWorldState(state=0, goals=goals), gamma=gamma) self.step_cost = step_cost self.cur_state = GraphWorldState(start_state, goals) self.is_goal_terminal = is_goal_terminal self.name = name
def __init__(self, width=8, height=7, init_loc=(1, 1), rocks=None, gamma=0.99, slip_prob=0.00, rock_rewards=[0.1, 1, 20], name="rocksample"): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' if rocks is None: rocks = [[1, 2, True], [5, 4, True], [6, 7, True], [1, 3, True], [4, 5, True], [2, 7, False], [2, 2, True], [7, 4, False]] self.init_loc = init_loc self.init_rocks = rocks self.rock_rewards = rock_rewards self.name = name + "-" + str(len(rocks)) self.width = width self.height = height MDP.__init__(self, RockSampleMDP.ACTIONS, self._transition_func, self._reward_func, init_state=self.get_init_state(), gamma=gamma)
def __init__(self, width=8, height=7, init_loc=(1, 1), rocks=[[1, 2, True], [3, 1, False], [4, 2, True], [3, 5, False], [4, 5, True], [2, 7, False], [6, 6, True], [7, 4, False]], gamma=0.99, slip_prob=0.00, rock_reward=10, name="rocksample"): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' self.init_loc = init_loc self.rocks = rocks self.rock_reward = rock_reward self._update_actions() self.name = name + "-" + str(self.rock_reward) self.width = width self.height = height MDP.__init__(self, RockSampleMDP.ACTIONS, self._transition_func, self._reward_func, init_state=self.get_init_state(), gamma=gamma)
def __init__(self, init_mean=(-0.2, -0.2), control_cost=False, dense_reward=False, render=False): xml = os.path.join( os.path.expanduser("~"), "git-repos/dm_control/dm_control/suite/point_mass.xml") model = load_model_from_path(xml) self.sim = MjSim(model) self.render = render self.init_mean = init_mean self.control_cost = control_cost self.dense_reward = dense_reward if self.render: self.viewer = MjViewer(self.sim) # Config self.env_name = "Point-Mass-Environment" self.target_position = np.array([0., 0.]) self.target_tolerance = 0.02 self.init_noise = 0.05 self._initialize_mujoco_state() self.init_state = self.get_state() print("Loaded {} with dense_reward={}".format(self.env_name, self.dense_reward)) MDP.__init__(self, [0, 1], self._transition_func, self._reward_func, self.init_state)
def __init__(self, starting_room=1, gamma=0.99, slip_prob=0.0, env_file=[], constraints={}, ap_maps={}, automata=[], init_state=[]): # TODO: work #self.terminal_func = lambda state: state._is_terminal self.constraints = constraints self.ap_maps = ap_maps self.slip_prob = slip_prob self.automata = automata if len(env_file) != 0: self.cube_env = env_file[0] CubeL1MDP.ACTIONS = self.cube_env['L1ACTIONS'] else: print("Input: env_file") # initial_state = CubeL1State(starting_room, self._transition_q(starting_room, "")) # if initial_state.q != 0: # initial_state.set_terminal(True) MDP.__init__(self, CubeL1MDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)
def __init__(self, gravity=9.8, masscart=1.0, masspole=0.1, length=.5, gamma=0.99, tau=.02, init_state_params=None, name="Cart-Pendulum"): if init_state_params is None: init_state = CartPoleState(x=0, x_dot=0, theta=0, theta_dot=0) else: init_state = CartPoleState(x=init_state_params["x"], x_dot=init_state_params["x_dot"],\ theta=init_state_params["theta"], theta_dot=init_state_params["theta_dot"]) MDP.__init__(self, CartPoleMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma) #from parameters self.gravity = gravity self.masscart = masscart self.masspole = masspole self.length = length self.gamma = gamma self.tau = tau self.name = name #thresholds self.x_threshold = 2.4 #abs val of limit of x position of cart self.theta_threshold = self._degrees_to_radians(20) #angle away from vertical before being considered terminal #computed self.total_mass = (self.masscart + self.masspole) self.polemass_length = (self.masspole * self.length)
def __init__(self, starting_floor=1, gamma=0.99, env_file=[], constraints={}, ap_maps={}): self.terminal_func = lambda state: state.q != 0 self.constraints = constraints self.ap_maps = ap_maps if len(env_file) != 0: self.cube_env = env_file[0] CubeL2MDP.ACTIONS = self.cube_env['L2ACTIONS'] else: print("Input: env_file") initial_state = CubeL2State(starting_floor, self._transition_q(starting_floor, "")) if initial_state.q != 0: initial_state.set_terminal(True) MDP.__init__(self, CubeL2MDP.ACTIONS, self._transition_func, self._reward_func, init_state=initial_state, gamma=gamma)
def __init__(self, task, init_loc=(0, 0), blocks=[], rooms=[], doors=[], rand_init=False, gamma=0.99, init_state=None): ''' :param task: The given CleanUpTask for this MDP :param init_loc: Initial agent location :param blocks: List of blocks :param rooms: List of rooms :param doors: List of doors :param rand_init: random initialization boolean :param gamma: gamma factor :param init_state: Initial state if given ''' from simple_rl.tasks.cleanup.cleanup_state import CleanUpState self.task = task if rand_init: block_loc = [(x, y) for block in blocks for (x, y) in (block.x, block.y)] init_loc = random.choice( [(x, y) for room in rooms for (x, y) in room.points_in_room if (x, y) not in block_loc]) init_state = CleanUpState(task, init_loc[0], init_loc[1], blocks=blocks, doors=doors, rooms=rooms) \ if init_state is None or rand_init else init_state self.cur_state = init_state MDP.__init__(self, self.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma) # The following lines are used for efficiency legal_states = [(x, y) for room in rooms for x, y in room.points_in_room] legal_states.extend([(door.x, door.y) for door in doors]) self.legal_states = set(legal_states) self.door_locs = set([(door.x, door.y) for door in doors]) self.width = max(self.legal_states, key=lambda tup: tup[0])[0] + 1 self.height = max(self.legal_states, key=lambda tup: tup[1])[1] + 1
def __init__(self, num_states=5, num_rand_trans=5, num_actions=3, gamma=0.99): ''' Args: num_states (int) [optional]: Number of states in the Random MDP. num_rand_trans (int) [optional]: Number of possible next states. Summary: Each state-action pair picks @num_rand_trans possible states and has a uniform distribution over them for transitions. Rewards are also chosen randomly. ''' RandomMDP.ACTIONS = [str(i) for i in range(num_actions)] MDP.__init__(self, RandomMDP.ACTIONS, self._transition_func, self._reward_func, init_state=RandomState(1), gamma=gamma) # assert(num_rand_trans <= num_states) self.num_rand_trans = num_rand_trans self.num_states = num_states self._reward_s_a = (random.choice(range(self.num_states)), random.choice(RandomMDP.ACTIONS)) self._transitions = defaultdict(lambda: defaultdict(str))
def __init__(self, combo, num_actions=3, num_states=None, reset_val=0.01, gamma=0.99): ''' Args: num_states (int) [optional]: Number of states in the chain. ''' ComboLockMDP.ACTIONS = [str(i) for i in range(1, num_actions + 1)] self.num_states = len(combo) if num_states is None else num_states self.num_actions = num_actions self.combo = combo if len(combo) != self.num_states: raise ValueError("(simple_rl.ComboLockMDP Error): Combo length (" + str(len(combo)) + ") must be the same as num_states (" + str(self.num_states) + ").") elif max(combo) > num_actions: raise ValueError( "(simple_rl.ComboLockMDP Error): Combo (" + str(combo) + ") must only contain values less than or equal to @num_actions (" + str(num_actions) + ").") MDP.__init__(self, ComboLockMDP.ACTIONS, self._transition_func, self._reward_func, init_state=ChainState(1), gamma=gamma)
def __init__(self, starting_room=1, goal_room=4, gamma=0.99): initial_state = FourRoomL1State(starting_room) self.goal_state = FourRoomL1State(goal_room, is_terminal=True) self.terminal_func = lambda state: state == self.goal_state MDP.__init__(self, FourRoomL1MDP.ACTIONS, self._transition_func, self._reward_func, init_state=initial_state, gamma=gamma)
def __init__(self, torque_multiplier=50., init_mean=(-0.2, -0.2), render=False): dir_path = os.path.dirname(os.path.realpath(__file__)) path = os.path.join(dir_path, 'asset/point_mass.xml') model = load_model_from_path(path) self.sim = MjSim(model) self.render = render self.init_mean = init_mean self.viewer = MjViewer(self.sim) # Config self.env_name = "Point-Mass-Environment" self.target_position = np.array([0., 0.]) self.target_tolerance = 0.01 self.init_noise = 0.05 self.max_absolute_torque = 5. self.torque_multiplier = torque_multiplier self._initialize_mujoco_state() self.init_state = self.get_state() MDP.__init__(self, [0, 1], self._transition_func, self._reward_func, self.init_state)
def __init__(self, width, height, tee_loc, hole_loc, rough_locs, hazard_locs, wind_dir, wind_spd, gamma, step_cost, name): ''' In rough, all driver shots converted to iron and failure chance exists In hazard, only putt is allowed and greater failure chance exists wind_dir is either up, down, left, right wind_spd shifts driver shots by .2*speed and iron shots by .1*speed on average ''' self.width = width self.height = height self.tee_loc = tee_loc self.hole_loc = hole_loc self.rough_locs = rough_locs self.hazard_locs = hazard_locs self.wind_dir = wind_dir self.wind_spd = wind_spd self.gamma = gamma self.step_cost = step_cost self.name = name self.new_x = 0 self.new_y = 0 self.init_state = GolfWorldState(self.tee_loc[0], self.tee_loc[1]) MDP.__init__(self, GolfWorldMDP.ACTIONS, self._transition_func, self._reward_func, self.init_state, self.gamma, self.step_cost)
def __init__(self, noise=0., episode_length=1000, reward_scale=1000., cfg="pinball_empty.cfg", render=False): # default_config_dir = os.path.join(__rlpy_location__, "Domains", "PinballConfigs") default_config_dir = os.path.dirname(__file__) self.cfg = cfg self.domain = Pinball(noise=noise, episodeCap=episode_length, configuration=os.path.join( default_config_dir, "PinballConfigs", self.cfg)) self.render = render self.reward_scale = reward_scale # Each observation from domain.step(action) is a tuple of the form reward, next_state, is_term, possible_actions # s0 returns initial state, is_terminal, possible_actions init_observation = self.domain.s0() init_state = tuple(init_observation[0]) actions = self.domain.actions MDP.__init__(self, actions, self._transition_func, self._reward_func, init_state=PinballState(*init_state))
def __init__(self, seed, dense_reward=False, render=False, color_str=""): self.env_name = "point_maze" self.seed = seed self.dense_reward = dense_reward self.render = render # Set random seed random.seed(seed) np.random.seed(seed) # Configure env gym_mujoco_kwargs = { 'maze_id': 'Maze', 'n_bins': 0, 'observe_blocks': False, 'put_spin_near_agent': False, 'top_down_view': False, 'manual_collision': True, 'maze_size_scaling': 4, 'color_str': color_str } self.env = PointMazeEnv(**gym_mujoco_kwargs) self.goal_position = self.env.goal_xy self.reset() MDP.__init__(self, [1, 2], self._transition_func, self._reward_func, self.init_state)
def __init__(self, gravity=9.8, masscart=1.0, masspole=0.1, length=.5, gamma=0.99, tau=.02, name="Cart-Pendulum"): init_state = CartPoleState(x=0, x_dot=0, theta=0, theta_dot=0) MDP.__init__(self, CartPoleMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma) #from parameters self.gravity = gravity self.masscart = masscart self.masspole = masspole self.length = length self.gamma = gamma self.tau = tau self.name = name #thresholds self.x_threshold = 2.4 #abs val of limit of x position of cart self.theta_threshold = self._degrees_to_radians( 20) #angle away from vertical before being considered terminal #computed self.total_mass = (self.masscart + self.masspole) self.polemass_length = (self.masspole * self.length)
def __init__(self, num_pegs=3, num_discs=3, gamma=0.95): ''' Args: num_pegs (int) num_discs (int) gamma (float) ''' self.num_pegs = num_pegs self.num_discs = num_discs HanoiMDP.ACTIONS = [ str(x) + str(y) for x, y in itertools.product( range(self.num_pegs), range(self.num_pegs)) if x != y ] # Setup init state. init_state = [" " for peg in range(num_pegs)] x = "" for i in range(num_discs): x += chr(97 + i) init_state[0] = x init_state = State(data=init_state) MDP.__init__(self, HanoiMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)
def __init__(self, ns, r): self.nS = ns self.nA = 2 self.states = range(self.nS) self.actions = range(self.nA) self.T = self.generate_transition_matrix() self.R = r MDP.__init__(self, self.actions, self._transition_func, init_state=0)
def __init__(self, num_states=5, reset_val=0.01, gamma=0.99): ''' Args: num_states (int) [optional]: Number of states in the chain. ''' MDP.__init__(self, ChainMDP.ACTIONS, self._transition_func, self._reward_func, init_state=ChainState(1), gamma=gamma) self.num_states = num_states self.reset_val = reset_val
def __init__(self, env_name='CartPole-v0', render=False): ''' Args: env_name (str) ''' self.env_name = env_name self.env = gym.make(env_name) self.render = render MDP.__init__(self, range(self.env.action_space.n), self._transition_func, self._reward_func, init_state=GymState(self.env.reset()))
def __init__(self, starting_room=1, goal_room=4, starting_items=[], goal_items=[] ,gamma=0.99,actions=None,doors=[],rooms=[]): initial_state = FourRoomL1State(starting_room,items=starting_items) self.goal_state = FourRoomL1State(goal_room, is_terminal=True, items=goal_items) self.terminal_func = lambda state: state == self.goal_state self.doors=doors self.rooms=rooms MDP.__init__(self,actions, self._transition_func, self._reward_func, init_state=initial_state, gamma=gamma)
def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99, step_cost=0, str='OMDP'): MDP.__init__(self, actions, transition_func, reward_func, init_state, gamma, step_cost) self.str = str
def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99): MDP.__init__(self, actions, transition_func, reward_func, init_state=init_state, gamma=gamma)
def __init__(self, width=5, height=3, init_loc=(1, 1), rand_init=False, goal_locs=[()], lava_locs=[()], walls=[], is_goal_terminal=True, is_lava_terminal=False, gamma=0.99, slip_prob=0.0, step_cost=0.0, lava_cost=1.0, name="gridworld"): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) lava_locs (list of tuples: [(int, int)...]): These locations return -1 reward. walls (list) is_goal_terminal (bool) ''' # Setup init location. self.rand_init = rand_init if rand_init: init_loc = random.randint(1, width), random.randint(1, height) while init_loc in walls: init_loc = random.randint(1, width), random.randint(1, height) self.init_loc = init_loc init_state = GridWorldState(init_loc[0], init_loc[1]) MDP.__init__(self, GridWorldMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma) if type(goal_locs) is not list: raise ValueError("(simple_rl) GridWorld Error: argument @goal_locs needs to be a list of locations. For example: [(3,3), (4,3)].") self.step_cost = step_cost self.lava_cost = lava_cost self.walls = walls self.width = width self.height = height self.goal_locs = goal_locs self.cur_state = GridWorldState(init_loc[0], init_loc[1]) self.is_goal_terminal = is_goal_terminal self.is_lava_terminal = is_lava_terminal self.slip_prob = slip_prob self.name = name self.lava_locs = lava_locs
def __init__(self, num_arms=10, distr_family=np.random.normal, distr_params=None): ''' Args: num_arms (int): Number of arms. distr_family (lambda): A function from numpy which, when given entities from @distr_params, samples from the distribution family. distr_params (dict): If None is given, default mu/sigma for normal distribution are initialized randomly. ''' BanditMDP.ACTIONS = [str(i) for i in range(1, num_arms + 1)] MDP.__init__(self, BanditMDP.ACTIONS, self._transition_func, self._reward_func, init_state=State(1), gamma=1.0) self.num_arms = num_arms self.distr_family = distr_family self.distr_params = self.init_distr_params() if distr_params is None else distr_params
def __init__(self, l0_domain): ''' Args: l0_domain (CleanUpMDP) ''' self.l0_domain = l0_domain state_mapper = AbstractCleanupL1StateMapper(l0_domain) l1_init_state = state_mapper.map_state(l0_domain.init_state) grounded_actions = CleanupL1MDP.ground_actions(l1_init_state) self.terminal_func = self._is_goal_state MDP.__init__(self, grounded_actions, self._transition_function, self._reward_function, l1_init_state)
def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99, num_agents=2): MDP.__init__(self, actions, transition_func, reward_func, init_state=init_state, gamma=gamma) self.num_agents = num_agents
def __init__(self, env_name='Reacher-v2', render=False): ''' Args: env_name (str) ''' self.env_name = env_name self.env = NormalizedEnv(gym.make(env_name)) self.render = render MDP.__init__(self, range(self.env.action_space.shape[0]), self._transition_func, self._reward_func, init_state=FixedReacherState(self.env.reset()))
def __init__(self, env_name='CartPole-v0', render=False, render_every_n_episodes=0): ''' Args: env_name (str) render (bool): If True, renders the screen every time step. render_every_n_epsiodes (int): @render must be True, then renders the screen every n episodes. ''' # self.render_every_n_steps = render_every_n_steps self.render_every_n_episodes = render_every_n_episodes self.episode = 0 self.env_name = env_name self.env = gym.make(env_name) self.render = render MDP.__init__(self, range(self.env.action_space.n), self._transition_func, self._reward_func, init_state=GymState(self.env.reset()))
def __init__(self, width=5, height=3, init_loc=(1, 1), goal_locs=[(5, 3)], num_colors=3, is_goal_terminal=True, gamma=0.99, init_state=None, slip_prob=0.0, name="color"): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' ColorMDP.COLOR_MAP = range(num_colors) # Setup init location. self.init_loc = init_loc init_state = ColorState( init_loc[0], init_loc[1], ColorMDP.COLOR_MAP[0] ) if init_state is None or rand_init else init_state MDP.__init__(self, ColorMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma) if type(goal_locs) is not list: print( "(simple_rl) color Error: argument @goal_locs needs to be a list of locations. For example: [(3,3), (4,3)]." ) quit() self.width = width self.height = height self.walls = self._compute_walls() self.goal_locs = goal_locs self.cur_state = ColorState(init_loc[0], init_loc[1], ColorMDP.COLOR_MAP[0]) self.is_goal_terminal = is_goal_terminal self.slip_prob = slip_prob self.name = name
def __init__(self, init_board=Board(False, 0), name="2048"): ''' Initializer method that creates the MDP and assigns the other fields according to the 2048 game. ''' self.step_cost = 0.0 self.gamma = 0.99 self.name = name self.current_state = TFEState((init_board.addTile()).addTile().board) # Initializing the actual Markov Decision Process: MDP.__init__(tfeMDP.ACTIONS, self._transition_func, self._reward_func, init_state=TFEState( (init_board.addTile()).addTile().board), gamma=gamma)
def __init__(self, pomdp): ''' Convert given POMDP to a Belief State MDP Args: pomdp (POMDP) ''' self.state_transition_func = pomdp.transition_func self.state_reward_func = pomdp.reward_func self.state_observation_func = pomdp.observation_func self.belief_updater_func = pomdp.belief_updater_func self.pomdp = pomdp MDP.__init__(self, pomdp.actions, self._belief_transition_function, self._belief_reward_function, BeliefState(pomdp.init_belief), pomdp.gamma, pomdp.step_cost)
def __init__(self, combo, num_actions=3, num_states=None, reset_val=0.01, gamma=0.99): ''' Args: num_states (int) [optional]: Number of states in the chain. ''' ComboLockMDP.ACTIONS = [str(i) for i in range(1, num_actions + 1)] self.num_states = len(combo) if num_states is None else num_states self.num_actions = num_actions self.combo = combo if len(combo) != self.num_states: raise ValueError("(simple_rl.ComboLockMDP Error): Combo length (" + str(len(combo)) + ") must be the same as num_states (" + str(self.num_states) + ").") elif max(combo) > num_actions: raise ValueError("(simple_rl.ComboLockMDP Error): Combo (" + str(combo) + ") must only contain values less than or equal to @num_actions (" + str(num_actions) +").") MDP.__init__(self, ComboLockMDP.ACTIONS, self._transition_func, self._reward_func, init_state=ChainState(1), gamma=gamma)
def __init__(self, width=8, height=7, init_loc=(1,1), rocks=None, gamma=0.99, slip_prob=0.00, rock_rewards=[0.1, 1, 20], name="rocksample"): ''' Args: height (int) width (int) init_loc (tuple: (int, int)) goal_locs (list of tuples: [(int, int)...]) ''' if rocks is None: rocks = [[1,2,True], [5,4,True], [6,7,True], [1,3,True], [4,5,True], [2,7,False], [2,2,True], [7,4,False]] self.init_loc = init_loc self.init_rocks = rocks self.rock_rewards = rock_rewards self.name = name + "-" + str(len(rocks)) self.width = width self.height = height MDP.__init__(self, RockSampleMDP.ACTIONS, self._transition_func, self._reward_func, init_state=self.get_init_state(), gamma=gamma)
def __init__(self, num_pegs=3, num_discs=3, gamma=0.95): ''' Args: num_pegs (int) num_discs (int) gamma (float) ''' self.num_pegs = num_pegs self.num_discs = num_discs HanoiMDP.ACTIONS = [str(x) + str(y) for x, y in itertools.product(range(self.num_pegs), range(self.num_pegs)) if x != y] # Setup init state. init_state = [" " for peg in range(num_pegs)] x = "" for i in range(num_discs): x += chr(97 + i) init_state[0] = x init_state = State(data=init_state) MDP.__init__(self, HanoiMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)