def register(id, **kwargs):
    """Idempotent version of gym.envs.registration.registry.

    Needed since aprl.envs can get imported multiple times, e.g. when deserializing policies.
    """
    try:
        existing_spec = registration.spec(id)
        new_spec = registration.EnvSpec(id, **kwargs)
        assert existing_spec.__dict__ == new_spec.__dict__
    except gym.error.UnregisteredEnv:  # not previously registered
        registration.register(id, **kwargs)
Example #2
0
def get_py_env(env_name,
               max_episode_steps=None,
               constant_task=None,
               use_neg_rew=None,
               margin=None):
  """Load an environment.

  Args:
    env_name: (str) name of the environment.
    max_episode_steps: (int) maximum number of steps per episode. Set to None
      to not include a limit.
    constant_task: specifies a fixed task to use for all episodes. Set to None
      to use tasks sampled from the task distribution.
    use_neg_rew: (bool) For the goal-reaching tasks, indicates whether to use
      a (-1, 0) sparse reward (use_neg_reward = True) or a (0, 1) sparse reward.
    margin: (float) For goal-reaching tasks, indicates the desired distance
      to the goal.
  Returns:
    env: the environment, build from a dynamics and task distribution
    task_distribution: the task distribution used for the environment.
  """
  if "sawyer" in env_name:
    print(("ERROR: Modify utils.py to import sawyer_env and not dm_env. "
           "Currently the sawyer_env import is commented out to prevent "
           "a segfault from occuring when trying to import both sawyer_env "
           "and dm_env"))
    assert False

  if env_name.split("_")[0] == "point":
    _, walls, resize_factor = env_name.split("_")
    dynamics = point_env.PointDynamics(
        walls=walls, resize_factor=int(resize_factor))
    task_distribution = point_env.PointGoalDistribution(dynamics)
  elif env_name.split("_")[0] == "pointTask":
    _, walls, resize_factor = env_name.split("_")
    dynamics = point_env.PointDynamics(
        walls=walls, resize_factor=int(resize_factor))
    task_distribution = point_env.PointTaskDistribution(dynamics)

  elif env_name == "quadruped-run":
    dynamics = dm_env.QuadrupedRunDynamics()
    task_distribution = dm_env.QuadrupedRunTaskDistribution(dynamics)
  elif env_name == "quadruped":
    dynamics = dm_env.QuadrupedRunDynamics()
    task_distribution = dm_env.QuadrupedContinuousTaskDistribution(dynamics)
  elif env_name == "hopper":
    dynamics = dm_env.HopperDynamics()
    task_distribution = dm_env.HopperTaskDistribution(dynamics)
  elif env_name == "hopper-discrete":
    dynamics = dm_env.HopperDynamics()
    task_distribution = dm_env.HopperDiscreteTaskDistribution(dynamics)
  elif env_name == "walker":
    dynamics = dm_env.WalkerDynamics()
    task_distribution = dm_env.WalkerTaskDistribution(dynamics)
  elif env_name == "humanoid":
    dynamics = dm_env.HumanoidDynamics()
    task_distribution = dm_env.HumanoidTaskDistribution(dynamics)

  ### sparse tasks
  elif env_name == "finger":
    dynamics = dm_env.FingerDynamics()
    task_distribution = dm_env.FingerTaskDistribution(
        dynamics, use_neg_rew=use_neg_rew, margin=margin)
  elif env_name == "manipulator":
    dynamics = dm_env.ManipulatorDynamics()
    task_distribution = dm_env.ManipulatorTaskDistribution(dynamics)
  elif env_name == "point-mass":
    dynamics = dm_env.PointMassDynamics()
    task_distribution = dm_env.PointMassTaskDistribution(
        dynamics, use_neg_rew=use_neg_rew, margin=margin)
  elif env_name == "stacker":
    dynamics = dm_env.StackerDynamics()
    task_distribution = dm_env.FingerStackerDistribution(
        dynamics, use_neg_rew=use_neg_rew, margin=margin)
  elif env_name == "swimmer":
    dynamics = dm_env.SwimmerDynamics()
    task_distribution = dm_env.SwimmerTaskDistribution(dynamics)
  elif env_name == "fish":
    dynamics = dm_env.FishDynamics()
    task_distribution = dm_env.FishTaskDistribution(dynamics)

  elif env_name == "sawyer-reach":
    dynamics = sawyer_env.SawyerDynamics()
    task_distribution = sawyer_env.SawyerReachTaskDistribution(dynamics)
  else:
    raise NotImplementedError("Unknown environment: %s" % env_name)
  gym_env = multitask.Environment(
      dynamics, task_distribution, constant_task=constant_task)
  if max_episode_steps is not None:
    # Add a placeholder spec so the TimeLimit wrapper works.
    gym_env.spec = registration.EnvSpec("env-v0")
    gym_env = TimeLimit(gym_env, max_episode_steps)
  wrapped_env = gym_wrapper.GymWrapper(gym_env, discount=1.0, auto_reset=True)
  return wrapped_env, task_distribution
Example #3
0
    def __init__(self,
                 time_limit=100,
                 multi_goal=False,
                 goal_tolerance=None,
                 zero_speed_init=True,
                 tuple_obspace=False,
                 dense_rewards=False,
                 env_config='blocks_simple.xml',
                 goal=[0.0, 0.0],
                 force_duration=10,
                 frame_skip=20,
                 Fmax=450.,
                 orient_euler=False,
                 normalize_obs=True):
        """
        Simplified version of the blocks environment - suppose to run faster than the orginal one
        Under assumption of using low dim features only and applying Fxy actions at the COM of a single block on the scene
        Observations = [x,y,z, qw, q0, q1, q2], where q == quaternion
        Actions = [Fx,Fy]
        :param time_limit:
        :param multi_goal:
        :param goal_tolerance: (int or array-like) if None then take tolerance from xml file
        :param zero_speed_init:
        :param tuple_obspace:
        :param dense_rewards:
        :param env_config:
        :param goal:
        :param force_duration:
        :param Fmax:
        """

        self.render_current_step = False

        # Viewer parameters
        self.scene_width = 126
        self.scene_height = 126

        self.Fmax = Fmax

        self._goal = None
        self._goal_static = None

        self.goal_static = goal

        self.time_step = 0
        self.max_episode_steps = time_limit
        self.pos_prev = np.array([0, 0, 0])
        self.max_dist2goal = 0.4
        self.use_static_goal = not multi_goal
        self.goal_feature_size = 2  #this property should go before goal_tolerance assignment
        self.goal_tolerance = goal_tolerance

        self.orient_euler = orient_euler
        if not orient_euler:
            self.orient_default = np.array([1., 0., 0., 0.])
            self.angle_coord_size = 4  # 4 == quaternions are used, 3 == euler is used

            self.get_obs_indx_names()
        else:
            raise NotImplemented

        self.pose_size = 3 + self.angle_coord_size  #7d
        self.tuple_obspace = tuple_obspace
        self.force_duration = force_duration

        ## Tolerances on velocities to settle as a part of
        # the logic that waits till the cube stops
        self.Vxy_atol = 1e-2
        self.Vxy_rtol = 1e-2
        self.Vang_atol = 1e-2
        self.Vang_rtol = 1e-2

        self.Vxy_atol = np.array([self.Vxy_atol] * 2)
        self.Vxy_rtol = np.array([self.Vxy_rtol] * 2)
        self.Vang_atol = np.array([self.Vang_atol] * self.angle_coord_size)
        self.Vang_rtol = np.array([self.Vang_rtol] * self.angle_coord_size)

        self.Vxy_null = np.zeros_like(self.Vxy_atol)
        self.Vang_null = np.zeros_like(self.Vang_atol)

        #These introduced for compatibility
        self.use_distance2center_stop_criteria = True
        self.use_mnist_stop_criteria = False
        self.use_mnist_reward = False
        self.max_action = np.array([1., 1.])
        self.zero_speed_init = zero_speed_init
        self.dense_rewards = dense_rewards

        # Reading table parameters directly from the xml
        env_config = os.path.join(
            os.path.split(os.path.abspath(__file__))[0], env_config)
        params_xml = self.read_xml_parameters(xml_filename=env_config)

        self.space_size = params_xml['space_size']
        self.table_wall_width = params_xml['table_wall_width']
        self.goal_radius = params_xml['goal_radius']
        # self.space_size = np.array([1.0,1.0,1.0])
        self.camera_distance = 2.5 * params_xml['space_size'][0]

        self.normalize_obs = False  #Initialliy we need to set it to false to prevent using normalization during initialization
        print('Pickle init ...')
        utils.EzPickle.__init__(self)

        print('Mujoco model init ...')
        mujoco_env.MujocoEnv.__init__(self,
                                      model_path=env_config,
                                      frame_skip=frame_skip)

        self.normalize_obs = normalize_obs
        # self.viewer = self._get_viewer()
        self.init_qpos = self.model.data.qpos.ravel().copy()
        self.init_qvel = np.zeros_like(self.model.data.qvel.ravel())
        self.init_qfrc = self.model.data.qfrc_applied.ravel().copy()

        action_low = np.array([-1., -1.])
        action_high = np.array([1., 1.])
        self.action_space = spaces.Box(action_low, action_high)

        if goal_tolerance is None:
            self.goal_tolerance = self.goal_radius

        self.get_obs_space()

        print('Pose: ', self.model.data.qpos.flatten())
        print('Vel: ', self.model.data.qvel.flatten())
        print('Com block: ', self.get_body_com("block"))
        print('Com goal: ', self.get_body_com("goal"))

        # self.goal_static = self.pose2goal(goal) #Must be filled in first since it will contain the full state
        # print('goal static: ', self.goal_static)
        self.goal = copy.deepcopy(self.goal_static)

        if self.tuple_obspace:
            self.observation_space = gym_tuple((self.observation_space, ))

        if self.spec is None:
            self.spec = gym_reg.EnvSpec(
                id='BlocksSimple-v0', max_episode_steps=self.max_episode_steps)

        self.max_action = np.linalg.norm(np.abs(self.action_space.high))

        print('Max action: ', self.max_action)
Example #4
0
    def __init__(self,
                 raw_control=True,
                 raw_control_zero_middle=True,
                 dim_mode='3D',
                 tf_control=False,
                 sim_steps=4,
                 obs_repr="state_xyz_vxyz_rot_omega",
                 ep_time=3,
                 thrust_noise_ratio=0.,
                 obstacles_num=0,
                 room_size=10,
                 init_random_state=False,
                 rew_coeff=None):
        np.seterr(under='ignore')
        """
        @param obs_repr: options: state_xyz_vxyz_rot_omega, state_xyz_vxyz_quat_omega
        """
        self.init_random_state = init_random_state
        self.room_size = room_size
        self.max_init_vel = 1.
        self.max_init_omega = 2 * np.pi
        self.room_box = np.array(
            [[-self.room_size, -self.room_size, 0],
             [self.room_size, self.room_size, self.room_size]])
        self.obs_repr = obs_repr
        self.state_vector = getattr(self, obs_repr)

        self.dynamics = default_dynamics(sim_steps,
                                         room_box=self.room_box,
                                         dim_mode=dim_mode,
                                         noise_scale=thrust_noise_ratio)
        # self.controller = ShiftedMotorControl(self.dynamics)
        # self.controller = OmegaThrustControl(self.dynamics) ## The last one used
        # self.controller = VelocityYawControl(self.dynamics)
        self.scene = None
        if obstacles_num > 0:
            self.obstacles = _random_obstacles(None, obstacles_num,
                                               self.room_size,
                                               self.dynamics.arm)
        else:
            self.obstacles = None

        # self.oracle = NonlinearPositionController(self.dynamics)
        self.dim_mode = dim_mode
        if self.dim_mode == '1D':
            self.viewpoint = 'side'
        else:
            self.viewpoint = 'chase'

        if raw_control:
            if self.dim_mode == '1D':  # Z axis only
                self.controller = VerticalControl(
                    self.dynamics, zero_action_middle=raw_control_zero_middle)
            elif self.dim_mode == '2D':  # X and Z axes only
                self.controller = VertPlaneControl(
                    self.dynamics, zero_action_middle=raw_control_zero_middle)
            elif self.dim_mode == '3D':
                self.controller = RawControl(
                    self.dynamics, zero_action_middle=raw_control_zero_middle)
            else:
                raise ValueError('QuadEnv: Unknown dimensionality mode %s' %
                                 self.dim_mode)
        else:
            self.controller = NonlinearPositionController(
                self.dynamics, tf_control=tf_control)

        self.action_space = self.controller.action_space(self.dynamics)

        ## Former way to get obs space
        # # pos, vel, rot, rot vel
        # obs_dim = 3 + 3 + 9 + 3 + 3 # xyz, Vxyz, R, Omega, goal_xyz
        # # TODO tighter bounds on some variables
        # obs_high = 100 * np.ones(obs_dim)
        # # rotation mtx guaranteed to be orthogonal
        # obs_high[6:6+9] = 1
        # self.observation_space = spaces.Box(-obs_high, obs_high)

        self.observation_space = self.get_observation_space()

        # TODO get this from a wrapper
        self.ep_time = ep_time  #In seconds
        self.dt = 1.0 / 100.0
        self.sim_steps = sim_steps
        self.ep_len = int(self.ep_time / (self.dt * self.sim_steps))
        self.tick = 0
        self.crashed = False

        self._seed()

        # size of the box from which initial position will be randomly sampled
        # if box_scale > 1.0 then it will also growevery episode
        self.box = 2.0
        self.box_scale = 1.0  #scale the initialbox by this factor eache episode

        #########################################
        ## REWARDS
        self.rew_coeff = {
            "pos": 1,
            "effort": 0.01,
            "crash": 1,
            "orient": 1,
            "vel_proj": 0,
            "spin_z": 1,
            "spin_xy": 0.5
        }
        if rew_coeff is not None:
            assert isinstance(rew_coeff, dict)
            assert set(rew_coeff.keys()).issubset(set(self.rew_coeff.keys()))
            self.rew_coeff.update(rew_coeff)

        self._reset()

        if self.spec is None:
            self.spec = gym_reg.EnvSpec(id='Quadrotor-v0',
                                        max_episode_steps=self.ep_len)

        # Always call Serializable constructor last
        Serializable.quick_init(self, locals())
Example #5
0
    def __init__(self, raw_control=True, raw_control_zero_middle=True, dim_mode='3D', tf_control=False, sim_steps=4,
                obs_repr="state_xyz_vxyz_rot_omega", ep_time=3):
        np.seterr(under='ignore')
        """
        @param obs_repr: options: state_xyz_vxyz_rot_omega, state_xyz_vxyz_quat_omega
        """
        self.room_box = np.array([[-10, -10, 0], [10, 10, 10]])
        self.obs_repr = obs_repr
        self.state_vector = getattr(self, obs_repr)

        self.dynamics = default_dynamics(sim_steps, room_box=self.room_box, dim_mode=dim_mode)
        # self.controller = ShiftedMotorControl(self.dynamics)
        # self.controller = OmegaThrustControl(self.dynamics) ## The last one used
        # self.controller = VelocityYawControl(self.dynamics)
        self.scene = None
        # self.oracle = NonlinearPositionController(self.dynamics)
        self.dim_mode = dim_mode
        if self.dim_mode =='1D':
            self.viewpoint = 'side'
        else:
            self.viewpoint = 'chase'

        if raw_control:
            if self.dim_mode == '1D':
                self.controller = VerticalControl(self.dynamics, zero_action_middle=raw_control_zero_middle, dim_mode="1D")
            elif self.dim_mode == '2D':
                self.controller = VertPlaneControl(self.dynamics, zero_action_middle=raw_control_zero_middle, dim_mode="2D")
            elif self.dim_mode == '3D':
                self.controller = RawControl(self.dynamics, zero_action_middle=raw_control_zero_middle)
            else:
                raise ValueError('QuadEnv: Unknown dimensionality mode %s' % self.dim_mode)
        else:
            self.controller = NonlinearPositionController(self.dynamics, tf_control=tf_control)

        self.action_space = self.controller.action_space(self.dynamics)

        ## Former way to get obs space
        # # pos, vel, rot, rot vel
        # obs_dim = 3 + 3 + 9 + 3 + 3 # xyz, Vxyz, R, Omega, goal_xyz
        # # TODO tighter bounds on some variables
        # obs_high = 100 * np.ones(obs_dim)
        # # rotation mtx guaranteed to be orthogonal
        # obs_high[6:6+9] = 1
        # self.observation_space = spaces.Box(-obs_high, obs_high)

        self.observation_space = self.get_observation_space()


        # TODO get this from a wrapper
        self.ep_time = ep_time #In seconds
        self.dt = 1.0 / 100.0
        self.sim_steps = sim_steps
        self.ep_len = int(self.ep_time / (self.dt * self.sim_steps))
        self.tick = 0
        # self.dt = 1.0 / 50.0
        self.crashed = False

        self._seed()

        # size of the box from which initial position will be randomly sampled
        # if box_scale > 1.0 then it will also growevery episode
        self.box = 2.0
        self.box_scale = 1.0 #scale the initialbox by this factor eache episode

        self._reset()

        if self.spec is None:
            self.spec = gym_reg.EnvSpec(id='Quadrotor-v0', max_episode_steps=self.ep_len)