def __init__(self, gamma=0.995, horizon=1500, debug_gui=False, kine_type='analytical'):
        self.simulation = AirHockeyBase(gamma, horizon, debug_gui)
        self.kin_type = kine_type

        # [X, Y, Pitch, Yaw, Psi]
        low = np.array([0.6, -0.5]) #, np.deg2rad(-180), np.deg2rad(-10), 0])
        high = np.array([1.1, 0.5]) #, np.deg2rad(180), np.deg2rad(75), 0])
        action_space = Box(low, high)

        observation_space = self.simulation._mdp_info.observation_space
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
        super().__init__(mdp_info)

        self.table_surface_height = 0.162
        self.iiwa_global_config = [1, -1, 1]
        self.fixed_parameters = [0., np.deg2rad(-35), 1.7]
        # self.fixed_parameters = [0., np.deg2rad(-35), 0]

        self.get_initial_state()

        if self.kin_type == 'analytical':
            self.kinematics = Kinematics()
        elif self.kin_type == 'pykdl' and KDLInterface is not None:
            self.kinematics = KDLInterface(self.simulation.robot_path, 7, 'front_base', 'F_link_ee',
                                           np.array(IIWA_JOINT_MIN_LIMITS), np.array(IIWA_JOINT_MAX_LIMITS),
                                           np.array([0., 0., -9.81]))
        else:
            raise NotImplementedError
Beispiel #2
0
    def __init__(self, size=5., goal=[2.5, 2.5], goal_radius=0.6):

        # Save important environment information
        self._size = size
        self._goal = np.array(goal)
        self._goal_radius = goal_radius

        # Create the action space.
        action_space = Discrete(4)  # 4 actions: N, S, W, E

        # Create the observation space. It's a 2D box of dimension (size x size).
        # You can also specify low and high array, if every component has different limits
        shape = (2,)
        observation_space = Box(0, size, shape)

        # Create the MDPInfo structure, needed by the environment interface
        mdp_info = MDPInfo(observation_space, action_space, gamma=0.99, horizon=100)

        super().__init__(mdp_info)

        # Create a state class variable to store the current state
        self._state = None

        # Create the viewer
        self._viewer = Viewer(size, size)
    def __init__(self,
                 start=None,
                 goal=None,
                 goal_threshold=.1,
                 noise_step=.025,
                 noise_reward=0,
                 reward_goal=0.,
                 thrust=.05,
                 puddle_center=None,
                 puddle_width=None,
                 gamma=.99,
                 horizon=5000):
        """
        Constructor.

        Args:
            start (np.array, None): starting position of the agent;
            goal (np.array, None): goal position;
            goal_threshold (float, .1): distance threshold of the agent from the
                goal to consider it reached;
            noise_step (float, .025): noise in actions;
            noise_reward (float, 0): standard deviation of gaussian noise in reward;
            reward_goal (float, 0): reward obtained reaching goal state;
            thrust (float, .05): distance walked during each action;
            puddle_center (np.array, None): center of the puddle;
            puddle_width (np.array, None): width of the puddle;
            gamma (float, .99): discount factor.
            horizon (int, 5000): horizon of the problem;

        """
        # MDP parameters
        self._start = np.array([.2, .4]) if start is None else start
        self._goal = np.array([1., 1.]) if goal is None else goal
        self._goal_threshold = goal_threshold
        self._noise_step = noise_step
        self._noise_reward = noise_reward
        self._reward_goal = reward_goal
        self._thrust = thrust
        puddle_center = [[.3, .6], [.4, .5], [.8, .9]
                         ] if puddle_center is None else puddle_center
        self._puddle_center = [np.array(center) for center in puddle_center]
        puddle_width = [[.1, .03], [.03, .1], [.03, .1]
                        ] if puddle_width is None else puddle_width
        self._puddle_width = [np.array(width) for width in puddle_width]

        self._actions = [np.zeros(2) for _ in range(5)]
        for i in range(4):
            self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1)

        # MDP properties
        action_space = Discrete(5)
        observation_space = Box(0., 1., shape=(2, ))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._pixels = None
        self._viewer = Viewer(1.0, 1.0)

        super().__init__(mdp_info)
Beispiel #4
0
 def _modify_mdp_info(self, mdp_info):
     obs_idx = [0, 1, 2, 7, 8, 9, 13, 14, 15, 16, 17, 18]
     obs_low = mdp_info.observation_space.low[obs_idx]
     obs_high = mdp_info.observation_space.high[obs_idx]
     obs_low[0:3] = [-1, -0.5, -np.pi]
     obs_high[0:3] = [1, 0.5, np.pi]
     observation_space = Box(low=obs_low, high=obs_high)
     return MDPInfo(observation_space, mdp_info.action_space,
                    mdp_info.gamma, mdp_info.horizon)
Beispiel #5
0
 def __init__(self, env):
     gym.Wrapper.__init__(self, env)
     self.arm_ac_size = env.action_space['ARM_ACTION']['arm_action'].shape[
         0]
     self.grip_ac_size = env.action_space['ARM_ACTION'][
         'grip_action'].shape[0]
     self.n_actions = self.arm_ac_size + self.grip_ac_size
     low = np.array([0.] * self.arm_ac_size + [-1.] * self.grip_ac_size)
     high = np.ones((self.arm_ac_size + self.grip_ac_size))
     self.action_space = Box(low=low, high=high, shape=(self.n_actions, ))
     self.observation_space = self.env.observation_space['robot_head_rgb']
     self._last_full_obs = None  # For rendering
Beispiel #6
0
    def __init__(self, name, horizon=None, gamma=0.99, history_length=4,
                 fixed_seed=None, use_pixels=False):
        """
        Constructor.

        Args:
             name (str): name of the environment;
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             history_length (int, 4): number of frames to form a state;
             fixed_seed (int, None): if passed, it fixes the seed of the
                environment at every reset. This way, the environment is fixed
                rather than procedurally generated;
             use_pixels (bool, False): if True, MiniGrid's default 7x7x3
                observations is converted to an image of resolution 56x56x3.

        """
        # MDP creation
        self._not_pybullet = True
        self._first = True

        env = gym.make(name)
        obs_high = 10.
        if use_pixels:
            env = RGBImgPartialObsWrapper(env) # Get pixel observations
            obs_high = 255.
        env = ImgObsWrapper(env) # Get rid of the 'mission' field
        self.env = env

        self._fixed_seed = fixed_seed

        self._img_size = env.observation_space.shape[0:2]
        self._history_length = history_length

        # Get the default horizon
        if horizon is None:
            horizon = self.env.max_steps

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(
            low=0., high=obs_high, shape=(history_length, self._img_size[1], self._img_size[0]))
        self.env.max_steps = horizon + 1 # Hack to ignore gym time limit (do not use np.inf, since MiniGrid returns r(t) = 1 - 0.9t/T)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        Environment.__init__(self, mdp_info)

        self._state = None
Beispiel #7
0
    def __init__(self, file_name, actuation_spec, observation_spec, gamma,
                 horizon, n_substeps=1, n_intermediate_steps=1, additional_data_spec=None,
                 collision_groups=None):
        """
        Constructor.

        Args:
            file_name (string): The path to the XML file with which the
                environment should be created;
            actuation_spec (list): A list specifying the names of the joints
                which should be controllable by the agent. Can be left empty
                when all actuators should be used;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            n_substeps (int): The number of substeps to use by the MuJoCo
                simulator. An action given by the agent will be applied for
                n_substeps before the agent receives the next observation and
                can act accordingly;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Similar to n_substeps but allows the user
                to modify, control and access intermediate states.
            additional_data_spec (list): A list containing the data fields of
                interest, which should be read from or written to during
                simulation. The entries are given as the following tuples:
                (key, name, type) key is a string for later referencing in the
                "read_data" and "write_data" methods. The name is the name of
                the object in the XML specification and the type is the
                ObservationType;
            collision_groups (list): A list containing groups of geoms for
                which collisions should be checked during simulation via
                ``check_collision``. The entries are given as:
                ``(key, geom_names)``, where key is a string for later
                referencing in the "check_collision" method, and geom_names is
                a list of geom names in the XML specification.
        """
        # Create the simulation
        self.sim = mujoco_py.MjSim(mujoco_py.load_model_from_path(file_name),
                                   nsubsteps=n_substeps)

        self.n_intermediate_steps = n_intermediate_steps
        self.viewer = None
        self._state = None

        # Read the actuation spec and build the mapping between actions and ids
        # as well as their limits
        if len(actuation_spec) == 0:
            self.action_indices = [i for i in range(0, len(self.sim.model._actuator_name2id))]
        else:
            self.action_indices = []
            for name in actuation_spec:
                self.action_indices.append(self.sim.model._actuator_name2id[name])

        low = []
        high = []
        for index in self.action_indices:
            if self.sim.model.actuator_ctrllimited[index]:
                low.append(self.sim.model.actuator_ctrlrange[index][0])
                high.append(self.sim.model.actuator_ctrlrange[index][1])
            else:
                low.append(-np.inf)
                high.append(np.inf)
        action_space = Box(np.array(low), np.array(high))

        # Read the observation spec to build a mapping at every step. It is
        # ensured that the values appear in the order they are specified.
        if len(observation_spec) == 0:
            raise AttributeError("No Environment observations were specified. "
                                 "Add at least one observation to the observation_spec.")
        else:
            self.observation_map = observation_spec

        # We can only specify limits for the joint positions, all other
        # information can be potentially unbounded
        low = []
        high = []
        for name, ot in self.observation_map:
            obs_count = len(self._observation_map(name, ot))
            if obs_count == 1 and ot == ObservationType.JOINT_POS:
                joint_range = self.sim.model.jnt_range[self.sim.model._joint_name2id[name]]
                if not(joint_range[0] == joint_range[1] == 0.0):
                    low.append(joint_range[0])
                    high.append(joint_range[1])
                else:
                    low.extend(-np.inf)
                    high.extend(np.inf)
            else:
                low.extend([-np.inf] * obs_count)
                high.extend([np.inf] * obs_count)
        observation_space = Box(np.array(low), np.array(high))

        # Pre-process the additional data to allow easier writing and reading
        # to and from arrays in MuJoCo
        self.additional_data = {}
        for key, name, ot in additional_data_spec:
            self.additional_data[key] = (name, ot)

        # Pre-process the collision groups for "fast" detection of contacts
        self.collision_groups = {}
        if self.collision_groups is not None:
            for name, geom_names in collision_groups:
                self.collision_groups[name] = {self.sim.model._geom_name2id[geom_name]
                                               for geom_name in geom_names}

        # Finally, we create the MDP information and call the constructor of
        # the parent class
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
        super().__init__(mdp_info)
Beispiel #8
0
from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.environment.battle import Battle
from poke_env.player.battle_order import BattleOrder
from poke_env.player.env_player import Gen8EnvSinglePlayer
from poke_env.player.player import Player
from poke_env.player_configuration import PlayerConfiguration
import torch
from poke.utils import battle_to_state_min, battle_to_state_max, battle_to_state_helper
from typing import Any, Callable, Union, Optional
import numpy as np
import logging

nactions = 13

# Default observation and action space defined from observation definition in utils.py
observation_space = Box(battle_to_state_min(), battle_to_state_max())
# For gen8, max of 4 local moves, 4 dynamax moves, 5 switches = 13 actions
# Does this serialization make sense? order matters and things change every time, hmm
action_space = Discrete(nactions)


class ShowdownEnvironment(Environment, Gen8EnvSinglePlayer):
    # override implementation of Gen8EnvSinglePlayer
    _ACTION_SPACE = list(range(13))

    def __init__(self,
                 player_configuration: Optional[PlayerConfiguration] = None,
                 mdp_info: MDPInfo = None,
                 embed_battle_func: Callable = battle_to_state_helper,
                 observation_space: Union[Box, Discrete] = observation_space,
                 action_space: Union[Box, Discrete] = action_space,
Beispiel #9
0
    def __init__(self,
                 files,
                 actuation_spec,
                 observation_spec,
                 gamma,
                 horizon,
                 timestep=1 / 240,
                 n_intermediate_steps=1,
                 debug_gui=False,
                 **viewer_params):
        """
        Constructor.

        Args:
            files (list): Paths to the URDF files to load;
            actuation_spec (list): A list of tuples specifying the names of the
                joints which should be controllable by the agent and tehir control mode.
                 Can be left empty when all actuators should be used in position control;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            timestep (float, 0.00416666666): The timestep used by the PyBullet
                simulator;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Allows the user to modify, control and
                access intermediate states;
            **viewer_params: other parameters to be passed to the viewer.
                See PyBulletViewer documentation for the available options.

        """

        # Store simulation parameters
        self._timestep = timestep
        self._n_intermediate_steps = n_intermediate_steps

        # Create the simulation and viewer
        if debug_gui:
            self._client = BulletClient(connection_mode=pybullet.GUI)
        else:
            self._client = BulletClient(connection_mode=pybullet.DIRECT)
        self._client.setTimeStep(self._timestep)
        self._client.setGravity(0, 0, -9.81)
        self._client.setAdditionalSearchPath(pybullet_data.getDataPath())

        self._viewer = PyBulletViewer(
            self._client, self._timestep * self._n_intermediate_steps,
            **viewer_params)
        self._state = None

        # Load model and create access maps
        self._model_map = dict()
        for file_name, kwargs in files.items():
            model_id = self._client.loadURDF(file_name, **kwargs)
            model_name = self._client.getBodyInfo(model_id)[1].decode('UTF-8')
            self._model_map[model_name] = model_id
        self._model_map.update(self._custom_load_models())

        self._joint_map = dict()
        self._link_map = dict()
        for model_id in self._model_map.values():
            for joint_id in range(self._client.getNumJoints(model_id)):
                joint_data = self._client.getJointInfo(model_id, joint_id)
                if joint_data[2] != pybullet.JOINT_FIXED:
                    joint_name = joint_data[1].decode('UTF-8')
                    self._joint_map[joint_name] = (model_id, joint_id)
                link_name = joint_data[12].decode('UTF-8')
                self._link_map[link_name] = (model_id, joint_id)

        # Read the actuation spec and build the mapping between actions and ids
        # as well as their limits
        assert (len(actuation_spec) > 0)
        self._action_data = list()
        for name, mode in actuation_spec:
            if name in self._joint_map:
                data = self._joint_map[name] + (mode, )
                self._action_data.append(data)

        low, high = self._compute_action_limits()
        action_space = Box(np.array(low), np.array(high))

        # Read the observation spec to build a mapping at every step. It is
        # ensured that the values appear in the order they are specified.
        if len(observation_spec) == 0:
            raise AttributeError(
                "No Environment observations were specified. "
                "Add at least one observation to the observation_spec.")

        self._observation_map = observation_spec
        self._observation_indices_map = dict()

        # We can only specify limits for the joint positions, all other
        # information can be potentially unbounded
        low, high = self._compute_observation_limits()
        observation_space = Box(low, high)

        # Finally, we create the MDP information and call the constructor of
        # the parent class
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
        super().__init__(mdp_info)

        # Save initial state of the MDP
        self._initial_state = self._client.saveState()
Beispiel #10
0
    def __init__(self,
                 config_file,
                 horizon=None,
                 gamma=0.99,
                 is_discrete=False,
                 width=None,
                 height=None,
                 debug_gui=False,
                 verbose=False):
        """
        Constructor.

        Args:
             config_file (str): path to the YAML file specifying the task
                (see igibson/examples/configs/ and igibson/test/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             is_discrete (bool, False): if True, actions are automatically
                discretized by iGibson's `set_up_discrete_action_space`.
                Please note that not all robots support discrete actions.
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used;
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used;
             debug_gui (bool, False): if True, activate the iGibson in GUI mode,
                showing the pybullet rendering and the robot camera.
             verbose (bool, False): if False, it disable iGibson default messages.

        """

        if not verbose:
            logging.disable(logging.CRITICAL +
                            1)  # Disable iGibson log messages

        # MDP creation
        self._not_pybullet = False
        self._first = True

        config = parse_config(config_file)
        config['is_discrete'] = is_discrete

        if horizon is not None:
            config['max_step'] = horizon
        else:
            horizon = config['max_step']
            config['max_step'] = horizon + 1  # Hack to ignore gym time limit

        if width is not None:
            config['image_width'] = width
        if height is not None:
            config['image_height'] = height

        env = iGibsonEnv(config_file=config,
                         mode='gui' if debug_gui else 'headless')
        env = iGibsonWrapper(env)

        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 60)
        self._image = None

        Environment.__init__(self, mdp_info)
Beispiel #11
0
    def __init__(self,
                 wrapper,
                 config_file,
                 base_config_file=None,
                 horizon=None,
                 gamma=0.99,
                 width=None,
                 height=None):
        """
        Constructor. For more details on how to pass YAML configuration files,
        please see <MUSHROOM_RL PATH>/examples/habitat/README.md

        Args:
             wrapper (str): wrapper for converting observations and actions
                (e.g., HabitatRearrangeWrapper);
             config_file (str): path to the YAML file specifying the RL task
                configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/);
             base_config_file (str, None): path to an optional YAML file, used
                as 'BASE_TASK_CONFIG_PATH' in the first YAML
                (see <HABITAT_LAB PATH>/configs/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used.
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used.

        """
        # MDP creation
        self._not_pybullet = False
        self._first = True

        if base_config_file is None:
            base_config_file = config_file

        config = get_config(config_paths=config_file,
                            opts=['BASE_TASK_CONFIG_PATH', base_config_file])

        config.defrost()

        if horizon is None:
            horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS  # Get the default horizon
        config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1  # Hack to ignore gym time limit

        # Overwrite all RGB width / height used for the TASK (not SIMULATOR)
        for k in config['TASK_CONFIG']['SIMULATOR']:
            if 'rgb' in k.lower():
                if height is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height
                if width is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width

        config.freeze()

        env_class = get_env_class(config.ENV_NAME)
        env = make_env_fn(env_class=env_class, config=config)
        env = globals()[wrapper](env)
        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 10)

        Environment.__init__(self, mdp_info)
Beispiel #12
0
    def __init__(self,
                 files,
                 actuation_spec,
                 observation_spec,
                 gamma,
                 horizon,
                 timestep=1 / 240,
                 n_intermediate_steps=1,
                 enforce_joint_velocity_limits=False,
                 debug_gui=False,
                 **viewer_params):
        """
        Constructor.

        Args:
            files (dict): dictionary of the URDF/MJCF/SDF files to load (key) and parameters dictionary (value);
            actuation_spec (list): A list of tuples specifying the names of the
                joints which should be controllable by the agent and tehir control mode.
                Can be left empty when all actuators should be used in position control;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            timestep (float, 0.00416666666): The timestep used by the PyBullet
                simulator;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Allows the user to modify, control and
                access intermediate states;
            enforce_joint_velocity_limits (bool, False): flag to enforce the velocity limits;
            debug_gui (bool, False): flag to activate the default pybullet visualizer, that can be used for debug
                purposes;
            **viewer_params: other parameters to be passed to the viewer.
                See PyBulletViewer documentation for the available options.

        """
        assert len(observation_spec) > 0
        assert len(actuation_spec) > 0

        # Store simulation parameters
        self._timestep = timestep
        self._n_intermediate_steps = n_intermediate_steps

        # Create the simulation and viewer
        if debug_gui:
            self._client = BulletClient(connection_mode=pybullet.GUI)
            self._client.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
        else:
            self._client = BulletClient(connection_mode=pybullet.DIRECT)
        self._client.setTimeStep(self._timestep)
        self._client.setGravity(0, 0, -9.81)
        self._client.setAdditionalSearchPath(pybullet_data.getDataPath())

        self._viewer = PyBulletViewer(self._client, self.dt, **viewer_params)
        self._state = None

        # Load model and create access maps
        self._model_map = dict()
        self._load_all_models(files, enforce_joint_velocity_limits)

        # Build utils
        self._indexer = IndexMap(self._client, self._model_map, actuation_spec,
                                 observation_spec)
        self.joints = JointsHelper(self._client, self._indexer,
                                   observation_spec)

        # Finally, we create the MDP information and call the constructor of the parent class
        action_space = Box(*self._indexer.action_limits)

        observation_space = Box(*self._indexer.observation_limits)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Let the child class modify the mdp_info data structure
        mdp_info = self._modify_mdp_info(mdp_info)

        # Provide the structure to the superclass
        super().__init__(mdp_info)

        # Save initial state of the MDP
        self._initial_state = self._client.saveState()