def __init__(self, gamma=0.995, horizon=1500, debug_gui=False, kine_type='analytical'): self.simulation = AirHockeyBase(gamma, horizon, debug_gui) self.kin_type = kine_type # [X, Y, Pitch, Yaw, Psi] low = np.array([0.6, -0.5]) #, np.deg2rad(-180), np.deg2rad(-10), 0]) high = np.array([1.1, 0.5]) #, np.deg2rad(180), np.deg2rad(75), 0]) action_space = Box(low, high) observation_space = self.simulation._mdp_info.observation_space mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info) self.table_surface_height = 0.162 self.iiwa_global_config = [1, -1, 1] self.fixed_parameters = [0., np.deg2rad(-35), 1.7] # self.fixed_parameters = [0., np.deg2rad(-35), 0] self.get_initial_state() if self.kin_type == 'analytical': self.kinematics = Kinematics() elif self.kin_type == 'pykdl' and KDLInterface is not None: self.kinematics = KDLInterface(self.simulation.robot_path, 7, 'front_base', 'F_link_ee', np.array(IIWA_JOINT_MIN_LIMITS), np.array(IIWA_JOINT_MAX_LIMITS), np.array([0., 0., -9.81])) else: raise NotImplementedError
def __init__(self, size=5., goal=[2.5, 2.5], goal_radius=0.6): # Save important environment information self._size = size self._goal = np.array(goal) self._goal_radius = goal_radius # Create the action space. action_space = Discrete(4) # 4 actions: N, S, W, E # Create the observation space. It's a 2D box of dimension (size x size). # You can also specify low and high array, if every component has different limits shape = (2,) observation_space = Box(0, size, shape) # Create the MDPInfo structure, needed by the environment interface mdp_info = MDPInfo(observation_space, action_space, gamma=0.99, horizon=100) super().__init__(mdp_info) # Create a state class variable to store the current state self._state = None # Create the viewer self._viewer = Viewer(size, size)
def __init__(self, start=None, goal=None, goal_threshold=.1, noise_step=.025, noise_reward=0, reward_goal=0., thrust=.05, puddle_center=None, puddle_width=None, gamma=.99, horizon=5000): """ Constructor. Args: start (np.array, None): starting position of the agent; goal (np.array, None): goal position; goal_threshold (float, .1): distance threshold of the agent from the goal to consider it reached; noise_step (float, .025): noise in actions; noise_reward (float, 0): standard deviation of gaussian noise in reward; reward_goal (float, 0): reward obtained reaching goal state; thrust (float, .05): distance walked during each action; puddle_center (np.array, None): center of the puddle; puddle_width (np.array, None): width of the puddle; gamma (float, .99): discount factor. horizon (int, 5000): horizon of the problem; """ # MDP parameters self._start = np.array([.2, .4]) if start is None else start self._goal = np.array([1., 1.]) if goal is None else goal self._goal_threshold = goal_threshold self._noise_step = noise_step self._noise_reward = noise_reward self._reward_goal = reward_goal self._thrust = thrust puddle_center = [[.3, .6], [.4, .5], [.8, .9] ] if puddle_center is None else puddle_center self._puddle_center = [np.array(center) for center in puddle_center] puddle_width = [[.1, .03], [.03, .1], [.03, .1] ] if puddle_width is None else puddle_width self._puddle_width = [np.array(width) for width in puddle_width] self._actions = [np.zeros(2) for _ in range(5)] for i in range(4): self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1) # MDP properties action_space = Discrete(5) observation_space = Box(0., 1., shape=(2, )) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Visualization self._pixels = None self._viewer = Viewer(1.0, 1.0) super().__init__(mdp_info)
def _modify_mdp_info(self, mdp_info): obs_idx = [0, 1, 2, 7, 8, 9, 13, 14, 15, 16, 17, 18] obs_low = mdp_info.observation_space.low[obs_idx] obs_high = mdp_info.observation_space.high[obs_idx] obs_low[0:3] = [-1, -0.5, -np.pi] obs_high[0:3] = [1, 0.5, np.pi] observation_space = Box(low=obs_low, high=obs_high) return MDPInfo(observation_space, mdp_info.action_space, mdp_info.gamma, mdp_info.horizon)
def __init__(self, env): gym.Wrapper.__init__(self, env) self.arm_ac_size = env.action_space['ARM_ACTION']['arm_action'].shape[ 0] self.grip_ac_size = env.action_space['ARM_ACTION'][ 'grip_action'].shape[0] self.n_actions = self.arm_ac_size + self.grip_ac_size low = np.array([0.] * self.arm_ac_size + [-1.] * self.grip_ac_size) high = np.ones((self.arm_ac_size + self.grip_ac_size)) self.action_space = Box(low=low, high=high, shape=(self.n_actions, )) self.observation_space = self.env.observation_space['robot_head_rgb'] self._last_full_obs = None # For rendering
def __init__(self, name, horizon=None, gamma=0.99, history_length=4, fixed_seed=None, use_pixels=False): """ Constructor. Args: name (str): name of the environment; horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; history_length (int, 4): number of frames to form a state; fixed_seed (int, None): if passed, it fixes the seed of the environment at every reset. This way, the environment is fixed rather than procedurally generated; use_pixels (bool, False): if True, MiniGrid's default 7x7x3 observations is converted to an image of resolution 56x56x3. """ # MDP creation self._not_pybullet = True self._first = True env = gym.make(name) obs_high = 10. if use_pixels: env = RGBImgPartialObsWrapper(env) # Get pixel observations obs_high = 255. env = ImgObsWrapper(env) # Get rid of the 'mission' field self.env = env self._fixed_seed = fixed_seed self._img_size = env.observation_space.shape[0:2] self._history_length = history_length # Get the default horizon if horizon is None: horizon = self.env.max_steps # MDP properties action_space = Discrete(self.env.action_space.n) observation_space = Box( low=0., high=obs_high, shape=(history_length, self._img_size[1], self._img_size[0])) self.env.max_steps = horizon + 1 # Hack to ignore gym time limit (do not use np.inf, since MiniGrid returns r(t) = 1 - 0.9t/T) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) Environment.__init__(self, mdp_info) self._state = None
def __init__(self, file_name, actuation_spec, observation_spec, gamma, horizon, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None, collision_groups=None): """ Constructor. Args: file_name (string): The path to the XML file with which the environment should be created; actuation_spec (list): A list specifying the names of the joints which should be controllable by the agent. Can be left empty when all actuators should be used; observation_spec (list): A list containing the names of data that should be made available to the agent as an observation and their type (ObservationType). An entry in the list is given by: (name, type); gamma (float): The discounting factor of the environment; horizon (int): The maximum horizon for the environment; n_substeps (int): The number of substeps to use by the MuJoCo simulator. An action given by the agent will be applied for n_substeps before the agent receives the next observation and can act accordingly; n_intermediate_steps (int): The number of steps between every action taken by the agent. Similar to n_substeps but allows the user to modify, control and access intermediate states. additional_data_spec (list): A list containing the data fields of interest, which should be read from or written to during simulation. The entries are given as the following tuples: (key, name, type) key is a string for later referencing in the "read_data" and "write_data" methods. The name is the name of the object in the XML specification and the type is the ObservationType; collision_groups (list): A list containing groups of geoms for which collisions should be checked during simulation via ``check_collision``. The entries are given as: ``(key, geom_names)``, where key is a string for later referencing in the "check_collision" method, and geom_names is a list of geom names in the XML specification. """ # Create the simulation self.sim = mujoco_py.MjSim(mujoco_py.load_model_from_path(file_name), nsubsteps=n_substeps) self.n_intermediate_steps = n_intermediate_steps self.viewer = None self._state = None # Read the actuation spec and build the mapping between actions and ids # as well as their limits if len(actuation_spec) == 0: self.action_indices = [i for i in range(0, len(self.sim.model._actuator_name2id))] else: self.action_indices = [] for name in actuation_spec: self.action_indices.append(self.sim.model._actuator_name2id[name]) low = [] high = [] for index in self.action_indices: if self.sim.model.actuator_ctrllimited[index]: low.append(self.sim.model.actuator_ctrlrange[index][0]) high.append(self.sim.model.actuator_ctrlrange[index][1]) else: low.append(-np.inf) high.append(np.inf) action_space = Box(np.array(low), np.array(high)) # Read the observation spec to build a mapping at every step. It is # ensured that the values appear in the order they are specified. if len(observation_spec) == 0: raise AttributeError("No Environment observations were specified. " "Add at least one observation to the observation_spec.") else: self.observation_map = observation_spec # We can only specify limits for the joint positions, all other # information can be potentially unbounded low = [] high = [] for name, ot in self.observation_map: obs_count = len(self._observation_map(name, ot)) if obs_count == 1 and ot == ObservationType.JOINT_POS: joint_range = self.sim.model.jnt_range[self.sim.model._joint_name2id[name]] if not(joint_range[0] == joint_range[1] == 0.0): low.append(joint_range[0]) high.append(joint_range[1]) else: low.extend(-np.inf) high.extend(np.inf) else: low.extend([-np.inf] * obs_count) high.extend([np.inf] * obs_count) observation_space = Box(np.array(low), np.array(high)) # Pre-process the additional data to allow easier writing and reading # to and from arrays in MuJoCo self.additional_data = {} for key, name, ot in additional_data_spec: self.additional_data[key] = (name, ot) # Pre-process the collision groups for "fast" detection of contacts self.collision_groups = {} if self.collision_groups is not None: for name, geom_names in collision_groups: self.collision_groups[name] = {self.sim.model._geom_name2id[geom_name] for geom_name in geom_names} # Finally, we create the MDP information and call the constructor of # the parent class mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info)
from poke_env.environment.abstract_battle import AbstractBattle from poke_env.environment.battle import Battle from poke_env.player.battle_order import BattleOrder from poke_env.player.env_player import Gen8EnvSinglePlayer from poke_env.player.player import Player from poke_env.player_configuration import PlayerConfiguration import torch from poke.utils import battle_to_state_min, battle_to_state_max, battle_to_state_helper from typing import Any, Callable, Union, Optional import numpy as np import logging nactions = 13 # Default observation and action space defined from observation definition in utils.py observation_space = Box(battle_to_state_min(), battle_to_state_max()) # For gen8, max of 4 local moves, 4 dynamax moves, 5 switches = 13 actions # Does this serialization make sense? order matters and things change every time, hmm action_space = Discrete(nactions) class ShowdownEnvironment(Environment, Gen8EnvSinglePlayer): # override implementation of Gen8EnvSinglePlayer _ACTION_SPACE = list(range(13)) def __init__(self, player_configuration: Optional[PlayerConfiguration] = None, mdp_info: MDPInfo = None, embed_battle_func: Callable = battle_to_state_helper, observation_space: Union[Box, Discrete] = observation_space, action_space: Union[Box, Discrete] = action_space,
def __init__(self, files, actuation_spec, observation_spec, gamma, horizon, timestep=1 / 240, n_intermediate_steps=1, debug_gui=False, **viewer_params): """ Constructor. Args: files (list): Paths to the URDF files to load; actuation_spec (list): A list of tuples specifying the names of the joints which should be controllable by the agent and tehir control mode. Can be left empty when all actuators should be used in position control; observation_spec (list): A list containing the names of data that should be made available to the agent as an observation and their type (ObservationType). An entry in the list is given by: (name, type); gamma (float): The discounting factor of the environment; horizon (int): The maximum horizon for the environment; timestep (float, 0.00416666666): The timestep used by the PyBullet simulator; n_intermediate_steps (int): The number of steps between every action taken by the agent. Allows the user to modify, control and access intermediate states; **viewer_params: other parameters to be passed to the viewer. See PyBulletViewer documentation for the available options. """ # Store simulation parameters self._timestep = timestep self._n_intermediate_steps = n_intermediate_steps # Create the simulation and viewer if debug_gui: self._client = BulletClient(connection_mode=pybullet.GUI) else: self._client = BulletClient(connection_mode=pybullet.DIRECT) self._client.setTimeStep(self._timestep) self._client.setGravity(0, 0, -9.81) self._client.setAdditionalSearchPath(pybullet_data.getDataPath()) self._viewer = PyBulletViewer( self._client, self._timestep * self._n_intermediate_steps, **viewer_params) self._state = None # Load model and create access maps self._model_map = dict() for file_name, kwargs in files.items(): model_id = self._client.loadURDF(file_name, **kwargs) model_name = self._client.getBodyInfo(model_id)[1].decode('UTF-8') self._model_map[model_name] = model_id self._model_map.update(self._custom_load_models()) self._joint_map = dict() self._link_map = dict() for model_id in self._model_map.values(): for joint_id in range(self._client.getNumJoints(model_id)): joint_data = self._client.getJointInfo(model_id, joint_id) if joint_data[2] != pybullet.JOINT_FIXED: joint_name = joint_data[1].decode('UTF-8') self._joint_map[joint_name] = (model_id, joint_id) link_name = joint_data[12].decode('UTF-8') self._link_map[link_name] = (model_id, joint_id) # Read the actuation spec and build the mapping between actions and ids # as well as their limits assert (len(actuation_spec) > 0) self._action_data = list() for name, mode in actuation_spec: if name in self._joint_map: data = self._joint_map[name] + (mode, ) self._action_data.append(data) low, high = self._compute_action_limits() action_space = Box(np.array(low), np.array(high)) # Read the observation spec to build a mapping at every step. It is # ensured that the values appear in the order they are specified. if len(observation_spec) == 0: raise AttributeError( "No Environment observations were specified. " "Add at least one observation to the observation_spec.") self._observation_map = observation_spec self._observation_indices_map = dict() # We can only specify limits for the joint positions, all other # information can be potentially unbounded low, high = self._compute_observation_limits() observation_space = Box(low, high) # Finally, we create the MDP information and call the constructor of # the parent class mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) super().__init__(mdp_info) # Save initial state of the MDP self._initial_state = self._client.saveState()
def __init__(self, config_file, horizon=None, gamma=0.99, is_discrete=False, width=None, height=None, debug_gui=False, verbose=False): """ Constructor. Args: config_file (str): path to the YAML file specifying the task (see igibson/examples/configs/ and igibson/test/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; is_discrete (bool, False): if True, actions are automatically discretized by iGibson's `set_up_discrete_action_space`. Please note that not all robots support discrete actions. width (int, None): width of the pixel observation. If None, the value specified in the config file is used; height (int, None): height of the pixel observation. If None, the value specified in the config file is used; debug_gui (bool, False): if True, activate the iGibson in GUI mode, showing the pybullet rendering and the robot camera. verbose (bool, False): if False, it disable iGibson default messages. """ if not verbose: logging.disable(logging.CRITICAL + 1) # Disable iGibson log messages # MDP creation self._not_pybullet = False self._first = True config = parse_config(config_file) config['is_discrete'] = is_discrete if horizon is not None: config['max_step'] = horizon else: horizon = config['max_step'] config['max_step'] = horizon + 1 # Hack to ignore gym time limit if width is not None: config['image_width'] = width if height is not None: config['image_height'] = height env = iGibsonEnv(config_file=config, mode='gui' if debug_gui else 'headless') env = iGibsonWrapper(env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 60) self._image = None Environment.__init__(self, mdp_info)
def __init__(self, wrapper, config_file, base_config_file=None, horizon=None, gamma=0.99, width=None, height=None): """ Constructor. For more details on how to pass YAML configuration files, please see <MUSHROOM_RL PATH>/examples/habitat/README.md Args: wrapper (str): wrapper for converting observations and actions (e.g., HabitatRearrangeWrapper); config_file (str): path to the YAML file specifying the RL task configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/); base_config_file (str, None): path to an optional YAML file, used as 'BASE_TASK_CONFIG_PATH' in the first YAML (see <HABITAT_LAB PATH>/configs/); horizon (int, None): the horizon; gamma (float, 0.99): the discount factor; width (int, None): width of the pixel observation. If None, the value specified in the config file is used. height (int, None): height of the pixel observation. If None, the value specified in the config file is used. """ # MDP creation self._not_pybullet = False self._first = True if base_config_file is None: base_config_file = config_file config = get_config(config_paths=config_file, opts=['BASE_TASK_CONFIG_PATH', base_config_file]) config.defrost() if horizon is None: horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS # Get the default horizon config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1 # Hack to ignore gym time limit # Overwrite all RGB width / height used for the TASK (not SIMULATOR) for k in config['TASK_CONFIG']['SIMULATOR']: if 'rgb' in k.lower(): if height is not None: config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height if width is not None: config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width config.freeze() env_class = get_env_class(config.ENV_NAME) env = make_env_fn(env_class=env_class, config=config) env = globals()[wrapper](env) self.env = env self._img_size = env.observation_space.shape[0:2] # MDP properties action_space = self.env.action_space observation_space = Box(low=0., high=255., shape=(3, self._img_size[1], self._img_size[0])) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) if isinstance(action_space, Discrete): self._convert_action = lambda a: a[0] else: self._convert_action = lambda a: a self._viewer = ImageViewer((self._img_size[1], self._img_size[0]), 1 / 10) Environment.__init__(self, mdp_info)
def __init__(self, files, actuation_spec, observation_spec, gamma, horizon, timestep=1 / 240, n_intermediate_steps=1, enforce_joint_velocity_limits=False, debug_gui=False, **viewer_params): """ Constructor. Args: files (dict): dictionary of the URDF/MJCF/SDF files to load (key) and parameters dictionary (value); actuation_spec (list): A list of tuples specifying the names of the joints which should be controllable by the agent and tehir control mode. Can be left empty when all actuators should be used in position control; observation_spec (list): A list containing the names of data that should be made available to the agent as an observation and their type (ObservationType). An entry in the list is given by: (name, type); gamma (float): The discounting factor of the environment; horizon (int): The maximum horizon for the environment; timestep (float, 0.00416666666): The timestep used by the PyBullet simulator; n_intermediate_steps (int): The number of steps between every action taken by the agent. Allows the user to modify, control and access intermediate states; enforce_joint_velocity_limits (bool, False): flag to enforce the velocity limits; debug_gui (bool, False): flag to activate the default pybullet visualizer, that can be used for debug purposes; **viewer_params: other parameters to be passed to the viewer. See PyBulletViewer documentation for the available options. """ assert len(observation_spec) > 0 assert len(actuation_spec) > 0 # Store simulation parameters self._timestep = timestep self._n_intermediate_steps = n_intermediate_steps # Create the simulation and viewer if debug_gui: self._client = BulletClient(connection_mode=pybullet.GUI) self._client.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0) else: self._client = BulletClient(connection_mode=pybullet.DIRECT) self._client.setTimeStep(self._timestep) self._client.setGravity(0, 0, -9.81) self._client.setAdditionalSearchPath(pybullet_data.getDataPath()) self._viewer = PyBulletViewer(self._client, self.dt, **viewer_params) self._state = None # Load model and create access maps self._model_map = dict() self._load_all_models(files, enforce_joint_velocity_limits) # Build utils self._indexer = IndexMap(self._client, self._model_map, actuation_spec, observation_spec) self.joints = JointsHelper(self._client, self._indexer, observation_spec) # Finally, we create the MDP information and call the constructor of the parent class action_space = Box(*self._indexer.action_limits) observation_space = Box(*self._indexer.observation_limits) mdp_info = MDPInfo(observation_space, action_space, gamma, horizon) # Let the child class modify the mdp_info data structure mdp_info = self._modify_mdp_info(mdp_info) # Provide the structure to the superclass super().__init__(mdp_info) # Save initial state of the MDP self._initial_state = self._client.saveState()