def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf): """ Constructor. Args: p (np.ndarray): transition probability matrix; rew (np.ndarray): reward matrix; mu (np.ndarray, None): initial state probability distribution; gamma (float, .9): discount factor; horizon (int, np.inf): the horizon. """ assert p.shape == rew.shape assert mu is None or p.shape[0] == mu.size # MDP parameters self.p = p self.r = rew self.mu = mu # MDP properties observation_space = spaces.Discrete(p.shape[0]) action_space = spaces.Discrete(p.shape[1]) horizon = horizon gamma = gamma mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, size=5., goal=[2.5, 2.5], goal_radius=0.6): # Save important environment information self._size = size self._goal = np.array(goal) self._goal_radius = goal_radius # Create the action space. action_space = Discrete(4) # 4 actions: N, S, W, E # Create the observation space. It's a 2D box of dimension (size x size). # You can also specify low and high array, if every component has different limits shape = (2,) observation_space = Box(0, size, shape) # Create the MDPInfo structure, needed by the environment interface mdp_info = MDPInfo(observation_space, action_space, gamma=0.99, horizon=100) super().__init__(mdp_info) # Create a state class variable to store the current state self._state = None # Create the viewer self._viewer = Viewer(size, size)
def __init__(self, horizon=100, gamma=.95): """ Constructor. Args: horizon (int, 100): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self.max_pos = 1. self.max_velocity = 3. high = np.array([self.max_pos, self.max_velocity]) self._g = 9.81 self._m = 1. self._dt = .1 self._discrete_actions = [-4., 4.] # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(2) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(1, 1) super().__init__(mdp_info)
def __init__(self, **kwargs): # Define environment properties high_x = np.array([5.0, 5.0, np.pi]) low_x = -high_x high_u = np.array([1.0, 3.0]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) gamma = 0.9 horizon = 400 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) hz = 10.0 super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz, **kwargs) # subscribe to /cmd_vel topic to publish the setpoint self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1) # subscribe to /gazebo/model_states to get the position of the turtlebot model_state_service_name = '/gazebo/get_model_state' rospy.wait_for_service(model_state_service_name) self._model_state_service = rospy.ServiceProxy( model_state_service_name, GetModelState)
def __init__(self, start=None, goal=None, goal_threshold=.1, noise_step=.025, noise_reward=0, reward_goal=0., thrust=.05, puddle_center=None, puddle_width=None, gamma=.99, horizon=5000): """ Constructor. Args: start (np.array, None): starting position of the agent; goal (np.array, None): goal position; goal_threshold (float, .1): distance threshold of the agent from the goal to consider it reached; noise_step (float, .025): noise in actions; noise_reward (float, 0): standard deviation of gaussian noise in reward; reward_goal (float, 0): reward obtained reaching goal state; thrust (float, .05): distance walked during each action; puddle_center (np.array, None): center of the puddle; puddle_width (np.array, None): width of the puddle; gamma (float, .99): discount factor. horizon (int, 5000): horizon of the problem; """ # MDP parameters self._start = np.array([.2, .4]) if start is None else start self._goal = np.array([1., 1.]) if goal is None else goal self._goal_threshold = goal_threshold self._noise_step = noise_step self._noise_reward = noise_reward self._reward_goal = reward_goal self._thrust = thrust puddle_center = [[.3, .6], [.4, .5], [.8, .9] ] if puddle_center is None else puddle_center self._puddle_center = [np.array(center) for center in puddle_center] puddle_width = [[.1, .03], [.03, .1], [.03, .1] ] if puddle_width is None else puddle_width self._puddle_width = [np.array(width) for width in puddle_width] self._actions = [np.zeros(2) for _ in range(5)] for i in range(4): self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1) # MDP properties action_space = Discrete(5) observation_space = Box(0., 1., shape=(2, )) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._pixels = None self._viewer = Viewer(1.0, 1.0) super().__init__(mdp_info)
def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)): # MDP properties observation_space = spaces.Discrete(height * width) action_space = spaces.Discrete(4) horizon = np.inf gamma = .95 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info, height, width, start, goal)
def _modify_mdp_info(self, mdp_info): obs_idx = [0, 1, 2, 7, 8, 9, 13, 14, 15, 16, 17, 18] obs_low = mdp_info.observation_space.low[obs_idx] obs_high = mdp_info.observation_space.high[obs_idx] obs_low[0:3] = [-1, -0.5, -np.pi] obs_high[0:3] = [1, 0.5, np.pi] observation_space = Box(low=obs_low, high=obs_high) return MDPInfo(observation_space, mdp_info.action_space, mdp_info.gamma, mdp_info.horizon)
def __init__(self, name, width=84, height=84, ends_at_life=False, max_pooling=True, history_length=4, max_no_op_actions=30): """ Constructor. Args: name (str): id name of the Atari game in Gym; width (int, 84): width of the screen; height (int, 84): height of the screen; ends_at_life (bool, False): whether the episode ends when a life is lost or not; max_pooling (bool, True): whether to do max-pooling or average-pooling of the last two frames when using NoFrameskip; history_length (int, 4): number of frames to form a state; max_no_op_actions (int, 30): maximum number of no-op action to execute at the beginning of an episode. """ # MPD creation if 'NoFrameskip' in name: self.env = MaxAndSkip(gym.make(name), history_length, max_pooling) else: self.env = gym.make(name) # MDP parameters self._img_size = (width, height) self._episode_ends_at_life = ends_at_life self._max_lives = self.env.unwrapped.ale.lives() self._lives = self._max_lives self._force_fire = None self._real_reset = True self._max_no_op_actions = max_no_op_actions self._history_length = history_length self._current_no_op = None assert self.env.unwrapped.get_action_meanings()[0] == 'NOOP' # MDP properties action_space = Discrete(self.env.action_space.n) observation_space = Box(low=0., high=255., shape=(history_length, self._img_size[1], self._img_size[0])) horizon = np.inf # the gym time limit is used. gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, A, B, Q, R, max_pos=np.inf, max_action=np.inf, random_init=False, episodic=False, gamma=0.9, horizon=50, initial_state=None): """ Constructor. Args: A (np.ndarray): the state dynamics matrix; B (np.ndarray): the action dynamics matrix; Q (np.ndarray): reward weight matrix for state; R (np.ndarray): reward weight matrix for action; max_pos (float, np.inf): maximum value of the state; max_action (float, np.inf): maximum value of the action; random_init (bool, False): start from a random state; episodic (bool, False): end the episode when the state goes over the threshold; gamma (float, 0.9): discount factor; horizon (int, 50): horizon of the mdp. """ self.A = A self.B = B self.Q = Q self.R = R self._max_pos = max_pos self._max_action = max_action self._episodic = episodic self.random_init = random_init self._initial_state = initial_state # MDP properties high_x = self._max_pos * np.ones(A.shape[0]) low_x = -high_x high_u = self._max_action * np.ones(B.shape[1]) low_u = -high_u observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, name, horizon=None, gamma=0.99, history_length=4, fixed_seed=None, use_pixels=False): """ Constructor. Args: name (str): name of the environment; horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; history_length (int, 4): number of frames to form a state; fixed_seed (int, None): if passed, it fixes the seed of the environment at every reset. This way, the environment is fixed rather than procedurally generated; use_pixels (bool, False): if True, MiniGrid's default 7x7x3 observations is converted to an image of resolution 56x56x3. """ # MDP creation self._not_pybullet = True self._first = True env = gym.make(name) obs_high = 10. if use_pixels: env = RGBImgPartialObsWrapper(env) # Get pixel observations obs_high = 255. env = ImgObsWrapper(env) # Get rid of the 'mission' field self.env = env self._fixed_seed = fixed_seed self._img_size = env.observation_space.shape[0:2] self._history_length = history_length # Get the default horizon if horizon is None: horizon = self.env.max_steps # MDP properties action_space = Discrete(self.env.action_space.n) observation_space = Box( low=0., high=obs_high, shape=(history_length, self._img_size[1], self._img_size[0])) self.env.max_steps = horizon + 1 # Hack to ignore gym time limit (do not use np.inf, since MiniGrid returns r(t) = 1 - 0.9t/T) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) Environment.__init__(self, mdp_info) self._state = None
def __init__(self, m=2., M=8., l=.5, g=9.8, mu=1e-2, max_u=50., noise_u=10., horizon=3000, gamma=.95): """ Constructor. Args: m (float, 2.0): mass of the pendulum; M (float, 8.0): mass of the cart; l (float, .5): length of the pendulum; g (float, 9.8): gravity acceleration constant; max_u (float, 50.): maximum allowed input torque; noise_u (float, 10.): maximum noise on the action; horizon (int, 3000): horizon of the problem; gamma (float, .95): discount factor. """ # MDP parameters self._m = m self._M = M self._l = l self._g = g self._alpha = 1 / (self._m + self._M) self._mu = mu self._dt = .1 self._max_u = max_u self._noise_u = noise_u high = np.array([np.inf, np.inf]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Discrete(3) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(2.5 * l, 2.5 * l) self._last_u = None self._state = None super().__init__(mdp_info)
def __init__(self, small=True, n_steps_action=3): """ Constructor. Args: small (bool, True): whether to use a small state space or not. n_steps_action (int, 3): number of integration intervals for each step of the mdp. """ # MDP parameters self.field_size = 150 if small else 1000 low = np.array([0, 0, -np.pi, -np.pi / 12.]) high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.]) self.omega_max = np.array([np.pi / 12.]) self._v = 3. self._T = 5. self._dt = .2 self._gate_s = np.empty(2) self._gate_e = np.empty(2) self._gate_s[0] = 100 if small else 350 self._gate_s[1] = 120 if small else 400 self._gate_e[0] = 120 if small else 450 self._gate_e[1] = 100 if small else 400 self._out_reward = -100 self._success_reward = 0 self._small = small self._state = None self.n_steps_action = n_steps_action # MDP properties observation_space = spaces.Box(low=low, high=high) action_space = spaces.Box(low=-self.omega_max, high=self.omega_max) horizon = 5000 gamma = .99 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(self.field_size, self.field_size, background=(66, 131, 237)) super().__init__(mdp_info)
def __init__(self, **kwargs): # Define environment properties high_x = np.array([11.088889122009277, 11.088889122009277, np.pi]) low_x = np.array([0, 0, -np.pi]) high_u = np.array([2.0, 2.0]) low_u = np.array([0., -2.0]) observation_space = spaces.Box(low=low_x, high=high_x) action_space = spaces.Box(low=low_u, high=high_u) gamma = 0.9 horizon = 100 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) hz = 10.0 self._target = np.array([5.0, 5.0]) self._current_pose = np.zeros(3) super().__init__('turtlesim_env', mdp_info, hz, **kwargs) # subscribe to /turtle1/cmd_vel topic to publish the setpoint self._cmd = rospy.Publisher('/turtle1/cmd_vel', Twist, queue_size=1) # subscribe to /turtle1/pose to get the turtle pose self._pose = rospy.Subscriber('/turtle1/pose', Pose, self._pose_callback) # connect to the teleporting service to reset environment self._reset_service_name = '/turtle1/teleport_absolute' rospy.wait_for_service(self._reset_service_name) self._reset_service = rospy.ServiceProxy(self._reset_service_name, TeleportAbsolute) # connect to the clear service to cleanup the traces self._clear_service_name = '/clear' rospy.wait_for_service(self._clear_service_name) self._clear_service = rospy.ServiceProxy(self._clear_service_name, Empty)
def __init__(self, random_start=False): """ Constructor. Args: random_start (bool, False): whether to start from a random position or from the horizontal one. """ # MDP parameters gamma = 0.97 self._Mr = 0.3 * 2 self._Mp = 2.55 self._Ip = 2.6e-2 self._Ir = 4.54e-4 * 2 self._l = 13.8e-2 self._r = 5.5e-2 self._dt = 1e-2 self._g = 9.81 self._max_u = 5 self._random = random_start high = np.array([-np.pi / 2, 15, 75]) # MDP properties observation_space = spaces.Box(low=-high, high=high) action_space = spaces.Box(low=np.array([-self._max_u]), high=np.array([self._max_u])) horizon = 300 mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._viewer = Viewer(5 * self._l, 5 * self._l) self._last_x = 0 super().__init__(mdp_info)
def __init__(self, config_file, horizon=None, gamma=0.99, is_discrete=False, width=None, height=None, debug_gui=False, verbose=False): """ Constructor. Args: config_file (str): path to the YAML file specifying the task (see igibson/examples/configs/ and igibson/test/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; is_discrete (bool, False): if True, actions are automatically discretized by iGibson's `set_up_discrete_action_space`. Please note that not all robots support discrete actions. width (int, None): width of the pixel observation. If None, the value specified in the config file is used; height (int, None): height of the pixel observation. If None, the value specified in the config file is used; debug_gui (bool, False): if True, activate the iGibson in GUI mode, showing the pybullet rendering and the robot camera. verbose (bool, False): if False, it disable iGibson default messages. """ if not verbose: logging.disable(logging.CRITICAL + 1) # Disable iGibson log messages # MDP creation self._not_pybullet = False self._first = True config = parse_config(config_file) config['is_discrete'] = is_discrete if horizon is not None: config['max_step'] = horizon else: horizon = config['max_step'] config['max_step'] = horizon + 1 # Hack to ignore gym time limit if width is not None: config['image_width'] = width if height is not None: config['image_height'] = height env = iGibsonEnv(config_file=config, mode='gui' if debug_gui else 'headless') env = iGibsonWrapper(env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 60) self._image = None Environment.__init__(self, mdp_info)
def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None): """ Constructor. Args: file_name (string): The path to the XML file with which the environment should be created; actuation_spec (list): A list specifying the names of the joints which should be controllable by the agent. Can be left empty when all actuators should be used; observation_spec (list): A list containing the names of data that should be made available to the agent as an observation and their type (ObservationType). An entry in the list is given by: (name, type); gamma (float): The discounting factor of the environment; horizon (int): The maximum horizon for the environment; n_substeps (int): The number of substeps to use by the MuJoCo simulator. An action given by the agent will be applied for n_substeps before the agent receives the next observation and can act accordingly; n_intermediate_steps (int): The number of steps between every action taken by the agent. Similar to n_substeps but allows the user to modify, control and access intermediate states. additional_data_spec (list): A list containing the data fields of interest, which should be read from or written to during simulation. The entries are given as the following tuples: (key, name, type) key is a string for later referencing in the "read_data" and "write_data" methods. The name is the name of the object in the XML specification and the type is the ObservationType; collision_groups (list): A list containing groups of geoms for which collisions should be checked during simulation via ``check_collision``. The entries are given as: ``(key, geom_names)``, where key is a string for later referencing in the "check_collision" method, and geom_names is a list of geom names in the XML specification. """ # Create the simulation self._sim = self._load_simulation(file_name, n_substeps) self._n_intermediate_steps = n_intermediate_steps self._viewer = None self._state = None # Read the actuation spec and build the mapping between actions and ids # as well as their limits if len(actuation_spec) == 0: self._action_indices = [ i for i in range(0, len(self._sim.model._actuator_name2id)) ] else: self._action_indices = [] for name in actuation_spec: self._action_indices.append( self._sim.model._actuator_name2id[name]) low = [] high = [] for index in self._action_indices: if self._sim.model.actuator_ctrllimited[index]: low.append(self._sim.model.actuator_ctrlrange[index][0]) high.append(self._sim.model.actuator_ctrlrange[index][1]) else: low.append(-np.inf) high.append(np.inf) action_space = Box(np.array(low), np.array(high)) # Read the observation spec to build a mapping at every step. It is # ensured that the values appear in the order they are specified. if len(observation_spec) == 0: raise AttributeError( "No Environment observations were specified. " "Add at least one observation to the observation_spec.") else: self._observation_map = observation_spec # We can only specify limits for the joint positions, all other # information can be potentially unbounded low = [] high = [] for name, ot in self._observation_map: obs_count = len(self._get_observation(name, ot)) if obs_count == 1 and ot == ObservationType.JOINT_POS: joint_range = self._sim.model.jnt_range[ self._sim.model._joint_name2id[name]] if not (joint_range[0] == joint_range[1] == 0.0): low.append(joint_range[0]) high.append(joint_range[1]) else: low.append(-np.inf) high.append(np.inf) else: low.extend([-np.inf] * obs_count) high.extend([np.inf] * obs_count) observation_space = Box(np.array(low), np.array(high)) # Pre-process the additional data to allow easier writing and reading # to and from arrays in MuJoCo self.additional_data = {} if additional_data_spec is not None: for key, name, ot in additional_data_spec: self.additional_data[key] = (name, ot) # Pre-process the collision groups for "fast" detection of contacts self.collision_groups = {} if collision_groups is not None: for name, geom_names in collision_groups: self.collision_groups[name] = { self._sim.model._geom_name2id[geom_name] for geom_name in geom_names } # Finally, we create the MDP information and call the constructor of # the parent class mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
def __init__(self, domain_name, task_name, horizon=None, gamma=0.99, task_kwargs=None, dt=.01, width_screen=480, height_screen=480, camera_id=0, use_pixels=False, pixels_width=64, pixels_height=64): """ Constructor. Args: domain_name (str): name of the environment; task_name (str): name of the task of the environment; horizon (int): the horizon; gamma (float): the discount factor; task_kwargs (dict, None): parameters of the task; dt (float, .01): duration of a control step; width_screen (int, 480): width of the screen; height_screen (int, 480): height of the screen; camera_id (int, 0): position of camera to render the environment; use_pixels (bool, False): if True, pixel observations are used rather than the state vector; pixels_width (int, 64): width of the pixel observation; pixels_height (int, 64): height of the pixel observation; """ # MDP creation self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs) if use_pixels: self.env = pixels.Wrapper(self.env, render_kwargs={ 'width': pixels_width, 'height': pixels_height }) # get the default horizon if horizon is None: horizon = self.env._step_limit # Hack to ignore dm_control time limit. self.env._step_limit = np.inf if use_pixels: self._convert_observation_space = self._convert_observation_space_pixels self._convert_observation = self._convert_observation_pixels else: self._convert_observation_space = self._convert_observation_space_vector self._convert_observation = self._convert_observation_vector # MDP properties action_space = self._convert_action_space(self.env.action_spec()) observation_space = self._convert_observation_space( self.env.observation_spec()) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) self._viewer = ImageViewer((width_screen, height_screen), dt) self._camera_id = camera_id super().__init__(mdp_info) self._state = None
def __init__(self, wrapper, config_file, base_config_file=None, horizon=None, gamma=0.99, width=None, height=None): """ Constructor. For more details on how to pass YAML configuration files, please see <MUSHROOM_RL PATH>/examples/habitat/README.md Args: wrapper (str): wrapper for converting observations and actions (e.g., HabitatRearrangeWrapper); config_file (str): path to the YAML file specifying the RL task configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/); base_config_file (str, None): path to an optional YAML file, used as 'BASE_TASK_CONFIG_PATH' in the first YAML (see <HABITAT_LAB PATH>/configs/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; width (int, None): width of the pixel observation. If None, the value specified in the config file is used. height (int, None): height of the pixel observation. If None, the value specified in the config file is used. """ # MDP creation self._not_pybullet = False self._first = True if base_config_file is None: base_config_file = config_file config = get_config(config_paths=config_file, opts=['BASE_TASK_CONFIG_PATH', base_config_file]) config.defrost() if horizon is None: horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS # Get the default horizon config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1 # Hack to ignore gym time limit # Overwrite all RGB width / height used for the TASK (not SIMULATOR) for k in config['TASK_CONFIG']['SIMULATOR']: if 'rgb' in k.lower(): if height is not None: config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height if width is not None: config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width config.freeze() env_class = get_env_class(config.ENV_NAME) env = make_env_fn(env_class=env_class, config=config) env = globals()[wrapper](env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 10) Environment.__init__(self, mdp_info)
def __init__(self, name, horizon=None, gamma=0.99, wrappers=None, wrappers_args=None, **env_args): """ Constructor. Args: name (str): gym id of the environment; horizon (int): the horizon. If None, use the one from Gym; gamma (float, 0.99): the discount factor; wrappers (list, None): list of wrappers to apply over the environment. It is possible to pass arguments to the wrappers by providing a tuple with two elements: the gym wrapper class and a dictionary containing the parameters needed by the wrapper constructor; wrappers_args (list, None): list of list of arguments for each wrapper; ** env_args: other gym environment parameters. """ # MDP creation self._not_pybullet = True self._first = True if pybullet_found and '- ' + name in pybullet_envs.getList(): import pybullet pybullet.connect(pybullet.DIRECT) self._not_pybullet = False self.env = gym.make(name, **env_args) self._render_dt = self.env.unwrapped.dt if hasattr( self.env.unwrapped, "dt") else 0.0 if wrappers is not None: if wrappers_args is None: wrappers_args = [dict()] * len(wrappers) for wrapper, args in zip(wrappers, wrappers_args): if isinstance(wrapper, tuple): self.env = wrapper[0](self.env, *args, **wrapper[1]) else: self.env = wrapper(self.env, *args, **env_args) horizon = self._set_horizon(self.env, horizon) # MDP properties assert not isinstance(self.env.observation_space, gym_spaces.MultiDiscrete) assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete) action_space = self._convert_gym_space(self.env.action_space) observation_space = self._convert_gym_space(self.env.observation_space) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a super().__init__(mdp_info)
def __init__(self, files, actuation_spec, observation_spec, gamma, horizon, timestep=1 / 240, n_intermediate_steps=1, enforce_joint_velocity_limits=False, debug_gui=False, **viewer_params): """ Constructor. Args: files (dict): dictionary of the URDF/MJCF/SDF files to load (key) and parameters dictionary (value); actuation_spec (list): A list of tuples specifying the names of the joints which should be controllable by the agent and tehir control mode. Can be left empty when all actuators should be used in position control; observation_spec (list): A list containing the names of data that should be made available to the agent as an observation and their type (ObservationType). An entry in the list is given by: (name, type); gamma (float): The discounting factor of the environment; horizon (int): The maximum horizon for the environment; timestep (float, 0.00416666666): The timestep used by the PyBullet simulator; n_intermediate_steps (int): The number of steps between every action taken by the agent. Allows the user to modify, control and access intermediate states; enforce_joint_velocity_limits (bool, False): flag to enforce the velocity limits; debug_gui (bool, False): flag to activate the default pybullet visualizer, that can be used for debug purposes; **viewer_params: other parameters to be passed to the viewer. See PyBulletViewer documentation for the available options. """ assert len(observation_spec) > 0 assert len(actuation_spec) > 0 # Store simulation parameters self._timestep = timestep self._n_intermediate_steps = n_intermediate_steps # Create the simulation and viewer if debug_gui: self._client = BulletClient(connection_mode=pybullet.GUI) self._client.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0) else: self._client = BulletClient(connection_mode=pybullet.DIRECT) self._client.setTimeStep(self._timestep) self._client.setGravity(0, 0, -9.81) self._client.setAdditionalSearchPath(pybullet_data.getDataPath()) self._viewer = PyBulletViewer(self._client, self.dt, **viewer_params) self._state = None # Load model and create access maps self._model_map = dict() self._load_all_models(files, enforce_joint_velocity_limits) # Build utils self._indexer = IndexMap(self._client, self._model_map, actuation_spec, observation_spec) self.joints = JointsHelper(self._client, self._indexer, observation_spec) # Finally, we create the MDP information and call the constructor of the parent class action_space = Box(*self._indexer.action_limits) observation_space = Box(*self._indexer.observation_limits) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Let the child class modify the mdp_info data structure mdp_info = self._modify_mdp_info(mdp_info) # Provide the structure to the superclass super().__init__(mdp_info) # Save initial state of the MDP self._initial_state = self._client.saveState()