예제 #1
0
    def __init__(self, p, rew, mu=None, gamma=.9, horizon=np.inf):
        """
        Constructor.

        Args:
            p (np.ndarray): transition probability matrix;
            rew (np.ndarray): reward matrix;
            mu (np.ndarray, None): initial state probability distribution;
            gamma (float, .9): discount factor;
            horizon (int, np.inf): the horizon.

        """
        assert p.shape == rew.shape
        assert mu is None or p.shape[0] == mu.size

        # MDP parameters
        self.p = p
        self.r = rew
        self.mu = mu

        # MDP properties
        observation_space = spaces.Discrete(p.shape[0])
        action_space = spaces.Discrete(p.shape[1])
        horizon = horizon
        gamma = gamma
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
예제 #2
0
    def __init__(self, size=5., goal=[2.5, 2.5], goal_radius=0.6):

        # Save important environment information
        self._size = size
        self._goal = np.array(goal)
        self._goal_radius = goal_radius

        # Create the action space.
        action_space = Discrete(4)  # 4 actions: N, S, W, E

        # Create the observation space. It's a 2D box of dimension (size x size).
        # You can also specify low and high array, if every component has different limits
        shape = (2,)
        observation_space = Box(0, size, shape)

        # Create the MDPInfo structure, needed by the environment interface
        mdp_info = MDPInfo(observation_space, action_space, gamma=0.99, horizon=100)

        super().__init__(mdp_info)

        # Create a state class variable to store the current state
        self._state = None

        # Create the viewer
        self._viewer = Viewer(size, size)
예제 #3
0
    def __init__(self, horizon=100, gamma=.95):
        """
        Constructor.

        Args:
            horizon (int, 100): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self.max_pos = 1.
        self.max_velocity = 3.
        high = np.array([self.max_pos, self.max_velocity])
        self._g = 9.81
        self._m = 1.
        self._dt = .1
        self._discrete_actions = [-4., 4.]

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(2)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(1, 1)

        super().__init__(mdp_info)
예제 #4
0
    def __init__(self, **kwargs):

        # Define environment properties
        high_x = np.array([5.0, 5.0, np.pi])
        low_x = -high_x

        high_u = np.array([1.0, 3.0])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)

        gamma = 0.9
        horizon = 400

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        hz = 10.0

        super(TurtlebotGazebo, self).__init__('turtlebot_gazebo', mdp_info, hz,
                                              **kwargs)

        # subscribe to /cmd_vel topic to publish the setpoint
        self._pub = rospy.Publisher('/cmd_vel', Twist, queue_size=1)

        # subscribe to /gazebo/model_states to get the position of the turtlebot
        model_state_service_name = '/gazebo/get_model_state'
        rospy.wait_for_service(model_state_service_name)
        self._model_state_service = rospy.ServiceProxy(
            model_state_service_name, GetModelState)
예제 #5
0
    def __init__(self,
                 start=None,
                 goal=None,
                 goal_threshold=.1,
                 noise_step=.025,
                 noise_reward=0,
                 reward_goal=0.,
                 thrust=.05,
                 puddle_center=None,
                 puddle_width=None,
                 gamma=.99,
                 horizon=5000):
        """
        Constructor.

        Args:
            start (np.array, None): starting position of the agent;
            goal (np.array, None): goal position;
            goal_threshold (float, .1): distance threshold of the agent from the
                goal to consider it reached;
            noise_step (float, .025): noise in actions;
            noise_reward (float, 0): standard deviation of gaussian noise in reward;
            reward_goal (float, 0): reward obtained reaching goal state;
            thrust (float, .05): distance walked during each action;
            puddle_center (np.array, None): center of the puddle;
            puddle_width (np.array, None): width of the puddle;
            gamma (float, .99): discount factor.
            horizon (int, 5000): horizon of the problem;

        """
        # MDP parameters
        self._start = np.array([.2, .4]) if start is None else start
        self._goal = np.array([1., 1.]) if goal is None else goal
        self._goal_threshold = goal_threshold
        self._noise_step = noise_step
        self._noise_reward = noise_reward
        self._reward_goal = reward_goal
        self._thrust = thrust
        puddle_center = [[.3, .6], [.4, .5], [.8, .9]
                         ] if puddle_center is None else puddle_center
        self._puddle_center = [np.array(center) for center in puddle_center]
        puddle_width = [[.1, .03], [.03, .1], [.03, .1]
                        ] if puddle_width is None else puddle_width
        self._puddle_width = [np.array(width) for width in puddle_width]

        self._actions = [np.zeros(2) for _ in range(5)]
        for i in range(4):
            self._actions[i][i // 2] = thrust * (i % 2 * 2 - 1)

        # MDP properties
        action_space = Discrete(5)
        observation_space = Box(0., 1., shape=(2, ))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._pixels = None
        self._viewer = Viewer(1.0, 1.0)

        super().__init__(mdp_info)
예제 #6
0
    def __init__(self, height=3, width=3, goal=(0, 2), start=(2, 0)):
        # MDP properties
        observation_space = spaces.Discrete(height * width)
        action_space = spaces.Discrete(4)
        horizon = np.inf
        gamma = .95
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info, height, width, start, goal)
예제 #7
0
 def _modify_mdp_info(self, mdp_info):
     obs_idx = [0, 1, 2, 7, 8, 9, 13, 14, 15, 16, 17, 18]
     obs_low = mdp_info.observation_space.low[obs_idx]
     obs_high = mdp_info.observation_space.high[obs_idx]
     obs_low[0:3] = [-1, -0.5, -np.pi]
     obs_high[0:3] = [1, 0.5, np.pi]
     observation_space = Box(low=obs_low, high=obs_high)
     return MDPInfo(observation_space, mdp_info.action_space,
                    mdp_info.gamma, mdp_info.horizon)
예제 #8
0
    def __init__(self,
                 name,
                 width=84,
                 height=84,
                 ends_at_life=False,
                 max_pooling=True,
                 history_length=4,
                 max_no_op_actions=30):
        """
        Constructor.

        Args:
            name (str): id name of the Atari game in Gym;
            width (int, 84): width of the screen;
            height (int, 84): height of the screen;
            ends_at_life (bool, False): whether the episode ends when a life is
               lost or not;
            max_pooling (bool, True): whether to do max-pooling or
                average-pooling of the last two frames when using NoFrameskip;
            history_length (int, 4): number of frames to form a state;
            max_no_op_actions (int, 30): maximum number of no-op action to
                execute at the beginning of an episode.

        """
        # MPD creation
        if 'NoFrameskip' in name:
            self.env = MaxAndSkip(gym.make(name), history_length, max_pooling)
        else:
            self.env = gym.make(name)

        # MDP parameters
        self._img_size = (width, height)
        self._episode_ends_at_life = ends_at_life
        self._max_lives = self.env.unwrapped.ale.lives()
        self._lives = self._max_lives
        self._force_fire = None
        self._real_reset = True
        self._max_no_op_actions = max_no_op_actions
        self._history_length = history_length
        self._current_no_op = None

        assert self.env.unwrapped.get_action_meanings()[0] == 'NOOP'

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(history_length, self._img_size[1],
                                       self._img_size[0]))
        horizon = np.inf  # the gym time limit is used.
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
예제 #9
0
    def __init__(self,
                 A,
                 B,
                 Q,
                 R,
                 max_pos=np.inf,
                 max_action=np.inf,
                 random_init=False,
                 episodic=False,
                 gamma=0.9,
                 horizon=50,
                 initial_state=None):
        """
        Constructor.

            Args:
                A (np.ndarray): the state dynamics matrix;
                B (np.ndarray): the action dynamics matrix;
                Q (np.ndarray): reward weight matrix for state;
                R (np.ndarray): reward weight matrix for action;
                max_pos (float, np.inf): maximum value of the state;
                max_action (float, np.inf): maximum value of the action;
                random_init (bool, False): start from a random state;
                episodic (bool, False): end the episode when the state goes over
                the threshold;
                gamma (float, 0.9): discount factor;
                horizon (int, 50): horizon of the mdp.

        """
        self.A = A
        self.B = B
        self.Q = Q
        self.R = R
        self._max_pos = max_pos
        self._max_action = max_action
        self._episodic = episodic
        self.random_init = random_init

        self._initial_state = initial_state

        # MDP properties
        high_x = self._max_pos * np.ones(A.shape[0])
        low_x = -high_x

        high_u = self._max_action * np.ones(B.shape[1])
        low_u = -high_u

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        super().__init__(mdp_info)
예제 #10
0
    def __init__(self, name, horizon=None, gamma=0.99, history_length=4,
                 fixed_seed=None, use_pixels=False):
        """
        Constructor.

        Args:
             name (str): name of the environment;
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             history_length (int, 4): number of frames to form a state;
             fixed_seed (int, None): if passed, it fixes the seed of the
                environment at every reset. This way, the environment is fixed
                rather than procedurally generated;
             use_pixels (bool, False): if True, MiniGrid's default 7x7x3
                observations is converted to an image of resolution 56x56x3.

        """
        # MDP creation
        self._not_pybullet = True
        self._first = True

        env = gym.make(name)
        obs_high = 10.
        if use_pixels:
            env = RGBImgPartialObsWrapper(env) # Get pixel observations
            obs_high = 255.
        env = ImgObsWrapper(env) # Get rid of the 'mission' field
        self.env = env

        self._fixed_seed = fixed_seed

        self._img_size = env.observation_space.shape[0:2]
        self._history_length = history_length

        # Get the default horizon
        if horizon is None:
            horizon = self.env.max_steps

        # MDP properties
        action_space = Discrete(self.env.action_space.n)
        observation_space = Box(
            low=0., high=obs_high, shape=(history_length, self._img_size[1], self._img_size[0]))
        self.env.max_steps = horizon + 1 # Hack to ignore gym time limit (do not use np.inf, since MiniGrid returns r(t) = 1 - 0.9t/T)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        Environment.__init__(self, mdp_info)

        self._state = None
예제 #11
0
    def __init__(self,
                 m=2.,
                 M=8.,
                 l=.5,
                 g=9.8,
                 mu=1e-2,
                 max_u=50.,
                 noise_u=10.,
                 horizon=3000,
                 gamma=.95):
        """
        Constructor.

        Args:
            m (float, 2.0): mass of the pendulum;
            M (float, 8.0): mass of the cart;
            l (float, .5): length of the pendulum;
            g (float, 9.8): gravity acceleration constant;
            max_u (float, 50.): maximum allowed input torque;
            noise_u (float, 10.): maximum noise on the action;
            horizon (int, 3000): horizon of the problem;
            gamma (float, .95): discount factor.

        """
        # MDP parameters
        self._m = m
        self._M = M
        self._l = l
        self._g = g
        self._alpha = 1 / (self._m + self._M)
        self._mu = mu
        self._dt = .1
        self._max_u = max_u
        self._noise_u = noise_u
        high = np.array([np.inf, np.inf])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Discrete(3)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(2.5 * l, 2.5 * l)
        self._last_u = None
        self._state = None

        super().__init__(mdp_info)
예제 #12
0
    def __init__(self, small=True, n_steps_action=3):
        """
        Constructor.

        Args:
             small (bool, True): whether to use a small state space or not.
             n_steps_action (int, 3): number of integration intervals for each
                                      step of the mdp.

        """
        # MDP parameters
        self.field_size = 150 if small else 1000
        low = np.array([0, 0, -np.pi, -np.pi / 12.])
        high = np.array([self.field_size, self.field_size, np.pi, np.pi / 12.])
        self.omega_max = np.array([np.pi / 12.])
        self._v = 3.
        self._T = 5.
        self._dt = .2
        self._gate_s = np.empty(2)
        self._gate_e = np.empty(2)
        self._gate_s[0] = 100 if small else 350
        self._gate_s[1] = 120 if small else 400
        self._gate_e[0] = 120 if small else 450
        self._gate_e[1] = 100 if small else 400
        self._out_reward = -100
        self._success_reward = 0
        self._small = small
        self._state = None
        self.n_steps_action = n_steps_action

        # MDP properties
        observation_space = spaces.Box(low=low, high=high)
        action_space = spaces.Box(low=-self.omega_max, high=self.omega_max)
        horizon = 5000
        gamma = .99
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(self.field_size, self.field_size,
                              background=(66, 131, 237))

        super().__init__(mdp_info)
예제 #13
0
    def __init__(self, **kwargs):

        # Define environment properties
        high_x = np.array([11.088889122009277, 11.088889122009277, np.pi])
        low_x = np.array([0, 0, -np.pi])

        high_u = np.array([2.0, 2.0])
        low_u = np.array([0., -2.0])

        observation_space = spaces.Box(low=low_x, high=high_x)
        action_space = spaces.Box(low=low_u, high=high_u)

        gamma = 0.9
        horizon = 100

        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        hz = 10.0

        self._target = np.array([5.0, 5.0])
        self._current_pose = np.zeros(3)

        super().__init__('turtlesim_env', mdp_info, hz, **kwargs)

        # subscribe to /turtle1/cmd_vel topic to publish the setpoint
        self._cmd = rospy.Publisher('/turtle1/cmd_vel', Twist, queue_size=1)

        # subscribe to /turtle1/pose to get the turtle pose
        self._pose = rospy.Subscriber('/turtle1/pose', Pose,
                                      self._pose_callback)

        # connect to the teleporting service to reset environment
        self._reset_service_name = '/turtle1/teleport_absolute'
        rospy.wait_for_service(self._reset_service_name)
        self._reset_service = rospy.ServiceProxy(self._reset_service_name,
                                                 TeleportAbsolute)

        # connect to the clear service to cleanup the traces
        self._clear_service_name = '/clear'
        rospy.wait_for_service(self._clear_service_name)
        self._clear_service = rospy.ServiceProxy(self._clear_service_name,
                                                 Empty)
예제 #14
0
    def __init__(self, random_start=False):
        """
        Constructor.

        Args:
            random_start (bool, False): whether to start from a random position
                or from the horizontal one.

        """
        # MDP parameters
        gamma = 0.97

        self._Mr = 0.3 * 2
        self._Mp = 2.55
        self._Ip = 2.6e-2
        self._Ir = 4.54e-4 * 2
        self._l = 13.8e-2
        self._r = 5.5e-2
        self._dt = 1e-2
        self._g = 9.81
        self._max_u = 5

        self._random = random_start

        high = np.array([-np.pi / 2, 15, 75])

        # MDP properties
        observation_space = spaces.Box(low=-high, high=high)
        action_space = spaces.Box(low=np.array([-self._max_u]),
                                  high=np.array([self._max_u]))
        horizon = 300
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Visualization
        self._viewer = Viewer(5 * self._l, 5 * self._l)
        self._last_x = 0

        super().__init__(mdp_info)
예제 #15
0
    def __init__(self,
                 config_file,
                 horizon=None,
                 gamma=0.99,
                 is_discrete=False,
                 width=None,
                 height=None,
                 debug_gui=False,
                 verbose=False):
        """
        Constructor.

        Args:
             config_file (str): path to the YAML file specifying the task
                (see igibson/examples/configs/ and igibson/test/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             is_discrete (bool, False): if True, actions are automatically
                discretized by iGibson's `set_up_discrete_action_space`.
                Please note that not all robots support discrete actions.
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used;
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used;
             debug_gui (bool, False): if True, activate the iGibson in GUI mode,
                showing the pybullet rendering and the robot camera.
             verbose (bool, False): if False, it disable iGibson default messages.

        """

        if not verbose:
            logging.disable(logging.CRITICAL +
                            1)  # Disable iGibson log messages

        # MDP creation
        self._not_pybullet = False
        self._first = True

        config = parse_config(config_file)
        config['is_discrete'] = is_discrete

        if horizon is not None:
            config['max_step'] = horizon
        else:
            horizon = config['max_step']
            config['max_step'] = horizon + 1  # Hack to ignore gym time limit

        if width is not None:
            config['image_width'] = width
        if height is not None:
            config['image_height'] = height

        env = iGibsonEnv(config_file=config,
                         mode='gui' if debug_gui else 'headless')
        env = iGibsonWrapper(env)

        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 60)
        self._image = None

        Environment.__init__(self, mdp_info)
예제 #16
0
    def __init__(self,
                 file_name,
                 actuation_spec,
                 observation_spec,
                 gamma,
                 horizon,
                 n_substeps=1,
                 n_intermediate_steps=1,
                 additional_data_spec=None,
                 collision_groups=None):
        """
        Constructor.

        Args:
            file_name (string): The path to the XML file with which the
                environment should be created;
            actuation_spec (list): A list specifying the names of the joints
                which should be controllable by the agent. Can be left empty
                when all actuators should be used;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            n_substeps (int): The number of substeps to use by the MuJoCo
                simulator. An action given by the agent will be applied for
                n_substeps before the agent receives the next observation and
                can act accordingly;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Similar to n_substeps but allows the user
                to modify, control and access intermediate states.
            additional_data_spec (list): A list containing the data fields of
                interest, which should be read from or written to during
                simulation. The entries are given as the following tuples:
                (key, name, type) key is a string for later referencing in the
                "read_data" and "write_data" methods. The name is the name of
                the object in the XML specification and the type is the
                ObservationType;
            collision_groups (list): A list containing groups of geoms for
                which collisions should be checked during simulation via
                ``check_collision``. The entries are given as:
                ``(key, geom_names)``, where key is a string for later
                referencing in the "check_collision" method, and geom_names is
                a list of geom names in the XML specification.
        """
        # Create the simulation
        self._sim = self._load_simulation(file_name, n_substeps)

        self._n_intermediate_steps = n_intermediate_steps
        self._viewer = None
        self._state = None

        # Read the actuation spec and build the mapping between actions and ids
        # as well as their limits
        if len(actuation_spec) == 0:
            self._action_indices = [
                i for i in range(0, len(self._sim.model._actuator_name2id))
            ]
        else:
            self._action_indices = []
            for name in actuation_spec:
                self._action_indices.append(
                    self._sim.model._actuator_name2id[name])

        low = []
        high = []
        for index in self._action_indices:
            if self._sim.model.actuator_ctrllimited[index]:
                low.append(self._sim.model.actuator_ctrlrange[index][0])
                high.append(self._sim.model.actuator_ctrlrange[index][1])
            else:
                low.append(-np.inf)
                high.append(np.inf)
        action_space = Box(np.array(low), np.array(high))

        # Read the observation spec to build a mapping at every step. It is
        # ensured that the values appear in the order they are specified.
        if len(observation_spec) == 0:
            raise AttributeError(
                "No Environment observations were specified. "
                "Add at least one observation to the observation_spec.")
        else:
            self._observation_map = observation_spec

        # We can only specify limits for the joint positions, all other
        # information can be potentially unbounded
        low = []
        high = []
        for name, ot in self._observation_map:
            obs_count = len(self._get_observation(name, ot))
            if obs_count == 1 and ot == ObservationType.JOINT_POS:
                joint_range = self._sim.model.jnt_range[
                    self._sim.model._joint_name2id[name]]
                if not (joint_range[0] == joint_range[1] == 0.0):
                    low.append(joint_range[0])
                    high.append(joint_range[1])
                else:
                    low.append(-np.inf)
                    high.append(np.inf)
            else:
                low.extend([-np.inf] * obs_count)
                high.extend([np.inf] * obs_count)
        observation_space = Box(np.array(low), np.array(high))

        # Pre-process the additional data to allow easier writing and reading
        # to and from arrays in MuJoCo
        self.additional_data = {}
        if additional_data_spec is not None:
            for key, name, ot in additional_data_spec:
                self.additional_data[key] = (name, ot)

        # Pre-process the collision groups for "fast" detection of contacts
        self.collision_groups = {}
        if collision_groups is not None:
            for name, geom_names in collision_groups:
                self.collision_groups[name] = {
                    self._sim.model._geom_name2id[geom_name]
                    for geom_name in geom_names
                }

        # Finally, we create the MDP information and call the constructor of
        # the parent class
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)
        super().__init__(mdp_info)
예제 #17
0
    def __init__(self,
                 domain_name,
                 task_name,
                 horizon=None,
                 gamma=0.99,
                 task_kwargs=None,
                 dt=.01,
                 width_screen=480,
                 height_screen=480,
                 camera_id=0,
                 use_pixels=False,
                 pixels_width=64,
                 pixels_height=64):
        """
        Constructor.

        Args:
             domain_name (str): name of the environment;
             task_name (str): name of the task of the environment;
             horizon (int): the horizon;
             gamma (float): the discount factor;
             task_kwargs (dict, None): parameters of the task;
             dt (float, .01): duration of a control step;
             width_screen (int, 480): width of the screen;
             height_screen (int, 480): height of the screen;
             camera_id (int, 0): position of camera to render the environment;
             use_pixels (bool, False): if True, pixel observations are used
                rather than the state vector;
             pixels_width (int, 64): width of the pixel observation;
             pixels_height (int, 64): height of the pixel observation;

        """
        # MDP creation
        self.env = suite.load(domain_name, task_name, task_kwargs=task_kwargs)
        if use_pixels:
            self.env = pixels.Wrapper(self.env,
                                      render_kwargs={
                                          'width': pixels_width,
                                          'height': pixels_height
                                      })

        # get the default horizon
        if horizon is None:
            horizon = self.env._step_limit

        # Hack to ignore dm_control time limit.
        self.env._step_limit = np.inf

        if use_pixels:
            self._convert_observation_space = self._convert_observation_space_pixels
            self._convert_observation = self._convert_observation_pixels
        else:
            self._convert_observation_space = self._convert_observation_space_vector
            self._convert_observation = self._convert_observation_vector

        # MDP properties
        action_space = self._convert_action_space(self.env.action_spec())
        observation_space = self._convert_observation_space(
            self.env.observation_spec())
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        self._viewer = ImageViewer((width_screen, height_screen), dt)
        self._camera_id = camera_id

        super().__init__(mdp_info)

        self._state = None
예제 #18
0
    def __init__(self,
                 wrapper,
                 config_file,
                 base_config_file=None,
                 horizon=None,
                 gamma=0.99,
                 width=None,
                 height=None):
        """
        Constructor. For more details on how to pass YAML configuration files,
        please see <MUSHROOM_RL PATH>/examples/habitat/README.md

        Args:
             wrapper (str): wrapper for converting observations and actions
                (e.g., HabitatRearrangeWrapper);
             config_file (str): path to the YAML file specifying the RL task
                configuration (see <HABITAT_LAB PATH>/habitat_baselines/configs/);
             base_config_file (str, None): path to an optional YAML file, used
                as 'BASE_TASK_CONFIG_PATH' in the first YAML
                (see <HABITAT_LAB PATH>/configs/);
             horizon (int, None): the horizon;
             gamma (float, 0.99): the discount factor;
             width (int, None): width of the pixel observation. If None, the
                value specified in the config file is used.
             height (int, None): height of the pixel observation. If None, the
                value specified in the config file is used.

        """
        # MDP creation
        self._not_pybullet = False
        self._first = True

        if base_config_file is None:
            base_config_file = config_file

        config = get_config(config_paths=config_file,
                            opts=['BASE_TASK_CONFIG_PATH', base_config_file])

        config.defrost()

        if horizon is None:
            horizon = config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS  # Get the default horizon
        config.TASK_CONFIG.ENVIRONMENT.MAX_EPISODE_STEPS = horizon + 1  # Hack to ignore gym time limit

        # Overwrite all RGB width / height used for the TASK (not SIMULATOR)
        for k in config['TASK_CONFIG']['SIMULATOR']:
            if 'rgb' in k.lower():
                if height is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['HEIGHT'] = height
                if width is not None:
                    config['TASK_CONFIG']['SIMULATOR'][k]['WIDTH'] = width

        config.freeze()

        env_class = get_env_class(config.ENV_NAME)
        env = make_env_fn(env_class=env_class, config=config)
        env = globals()[wrapper](env)
        self.env = env

        self._img_size = env.observation_space.shape[0:2]

        # MDP properties
        action_space = self.env.action_space
        observation_space = Box(low=0.,
                                high=255.,
                                shape=(3, self._img_size[1],
                                       self._img_size[0]))
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        self._viewer = ImageViewer((self._img_size[1], self._img_size[0]),
                                   1 / 10)

        Environment.__init__(self, mdp_info)
예제 #19
0
    def __init__(self,
                 name,
                 horizon=None,
                 gamma=0.99,
                 wrappers=None,
                 wrappers_args=None,
                 **env_args):
        """
        Constructor.

        Args:
             name (str): gym id of the environment;
             horizon (int): the horizon. If None, use the one from Gym;
             gamma (float, 0.99): the discount factor;
             wrappers (list, None): list of wrappers to apply over the environment. It
                is possible to pass arguments to the wrappers by providing
                a tuple with two elements: the gym wrapper class and a
                dictionary containing the parameters needed by the wrapper
                constructor;
            wrappers_args (list, None): list of list of arguments for each wrapper;
            ** env_args: other gym environment parameters.

        """

        # MDP creation
        self._not_pybullet = True
        self._first = True
        if pybullet_found and '- ' + name in pybullet_envs.getList():
            import pybullet
            pybullet.connect(pybullet.DIRECT)
            self._not_pybullet = False

        self.env = gym.make(name, **env_args)

        self._render_dt = self.env.unwrapped.dt if hasattr(
            self.env.unwrapped, "dt") else 0.0

        if wrappers is not None:
            if wrappers_args is None:
                wrappers_args = [dict()] * len(wrappers)
            for wrapper, args in zip(wrappers, wrappers_args):
                if isinstance(wrapper, tuple):
                    self.env = wrapper[0](self.env, *args, **wrapper[1])
                else:
                    self.env = wrapper(self.env, *args, **env_args)

        horizon = self._set_horizon(self.env, horizon)

        # MDP properties
        assert not isinstance(self.env.observation_space,
                              gym_spaces.MultiDiscrete)
        assert not isinstance(self.env.action_space, gym_spaces.MultiDiscrete)

        action_space = self._convert_gym_space(self.env.action_space)
        observation_space = self._convert_gym_space(self.env.observation_space)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        if isinstance(action_space, Discrete):
            self._convert_action = lambda a: a[0]
        else:
            self._convert_action = lambda a: a

        super().__init__(mdp_info)
예제 #20
0
    def __init__(self,
                 files,
                 actuation_spec,
                 observation_spec,
                 gamma,
                 horizon,
                 timestep=1 / 240,
                 n_intermediate_steps=1,
                 enforce_joint_velocity_limits=False,
                 debug_gui=False,
                 **viewer_params):
        """
        Constructor.

        Args:
            files (dict): dictionary of the URDF/MJCF/SDF files to load (key) and parameters dictionary (value);
            actuation_spec (list): A list of tuples specifying the names of the
                joints which should be controllable by the agent and tehir control mode.
                Can be left empty when all actuators should be used in position control;
            observation_spec (list): A list containing the names of data that
                should be made available to the agent as an observation and
                their type (ObservationType). An entry in the list is given by:
                (name, type);
            gamma (float): The discounting factor of the environment;
            horizon (int): The maximum horizon for the environment;
            timestep (float, 0.00416666666): The timestep used by the PyBullet
                simulator;
            n_intermediate_steps (int): The number of steps between every action
                taken by the agent. Allows the user to modify, control and
                access intermediate states;
            enforce_joint_velocity_limits (bool, False): flag to enforce the velocity limits;
            debug_gui (bool, False): flag to activate the default pybullet visualizer, that can be used for debug
                purposes;
            **viewer_params: other parameters to be passed to the viewer.
                See PyBulletViewer documentation for the available options.

        """
        assert len(observation_spec) > 0
        assert len(actuation_spec) > 0

        # Store simulation parameters
        self._timestep = timestep
        self._n_intermediate_steps = n_intermediate_steps

        # Create the simulation and viewer
        if debug_gui:
            self._client = BulletClient(connection_mode=pybullet.GUI)
            self._client.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
        else:
            self._client = BulletClient(connection_mode=pybullet.DIRECT)
        self._client.setTimeStep(self._timestep)
        self._client.setGravity(0, 0, -9.81)
        self._client.setAdditionalSearchPath(pybullet_data.getDataPath())

        self._viewer = PyBulletViewer(self._client, self.dt, **viewer_params)
        self._state = None

        # Load model and create access maps
        self._model_map = dict()
        self._load_all_models(files, enforce_joint_velocity_limits)

        # Build utils
        self._indexer = IndexMap(self._client, self._model_map, actuation_spec,
                                 observation_spec)
        self.joints = JointsHelper(self._client, self._indexer,
                                   observation_spec)

        # Finally, we create the MDP information and call the constructor of the parent class
        action_space = Box(*self._indexer.action_limits)

        observation_space = Box(*self._indexer.observation_limits)
        mdp_info = MDPInfo(observation_space, action_space, gamma, horizon)

        # Let the child class modify the mdp_info data structure
        mdp_info = self._modify_mdp_info(mdp_info)

        # Provide the structure to the superclass
        super().__init__(mdp_info)

        # Save initial state of the MDP
        self._initial_state = self._client.saveState()