def register(id, **kwargs): """Idempotent version of gym.envs.registration.registry. Needed since aprl.envs can get imported multiple times, e.g. when deserializing policies. """ try: existing_spec = registration.spec(id) new_spec = registration.EnvSpec(id, **kwargs) assert existing_spec.__dict__ == new_spec.__dict__ except gym.error.UnregisteredEnv: # not previously registered registration.register(id, **kwargs)
def get_py_env(env_name, max_episode_steps=None, constant_task=None, use_neg_rew=None, margin=None): """Load an environment. Args: env_name: (str) name of the environment. max_episode_steps: (int) maximum number of steps per episode. Set to None to not include a limit. constant_task: specifies a fixed task to use for all episodes. Set to None to use tasks sampled from the task distribution. use_neg_rew: (bool) For the goal-reaching tasks, indicates whether to use a (-1, 0) sparse reward (use_neg_reward = True) or a (0, 1) sparse reward. margin: (float) For goal-reaching tasks, indicates the desired distance to the goal. Returns: env: the environment, build from a dynamics and task distribution task_distribution: the task distribution used for the environment. """ if "sawyer" in env_name: print(("ERROR: Modify utils.py to import sawyer_env and not dm_env. " "Currently the sawyer_env import is commented out to prevent " "a segfault from occuring when trying to import both sawyer_env " "and dm_env")) assert False if env_name.split("_")[0] == "point": _, walls, resize_factor = env_name.split("_") dynamics = point_env.PointDynamics( walls=walls, resize_factor=int(resize_factor)) task_distribution = point_env.PointGoalDistribution(dynamics) elif env_name.split("_")[0] == "pointTask": _, walls, resize_factor = env_name.split("_") dynamics = point_env.PointDynamics( walls=walls, resize_factor=int(resize_factor)) task_distribution = point_env.PointTaskDistribution(dynamics) elif env_name == "quadruped-run": dynamics = dm_env.QuadrupedRunDynamics() task_distribution = dm_env.QuadrupedRunTaskDistribution(dynamics) elif env_name == "quadruped": dynamics = dm_env.QuadrupedRunDynamics() task_distribution = dm_env.QuadrupedContinuousTaskDistribution(dynamics) elif env_name == "hopper": dynamics = dm_env.HopperDynamics() task_distribution = dm_env.HopperTaskDistribution(dynamics) elif env_name == "hopper-discrete": dynamics = dm_env.HopperDynamics() task_distribution = dm_env.HopperDiscreteTaskDistribution(dynamics) elif env_name == "walker": dynamics = dm_env.WalkerDynamics() task_distribution = dm_env.WalkerTaskDistribution(dynamics) elif env_name == "humanoid": dynamics = dm_env.HumanoidDynamics() task_distribution = dm_env.HumanoidTaskDistribution(dynamics) ### sparse tasks elif env_name == "finger": dynamics = dm_env.FingerDynamics() task_distribution = dm_env.FingerTaskDistribution( dynamics, use_neg_rew=use_neg_rew, margin=margin) elif env_name == "manipulator": dynamics = dm_env.ManipulatorDynamics() task_distribution = dm_env.ManipulatorTaskDistribution(dynamics) elif env_name == "point-mass": dynamics = dm_env.PointMassDynamics() task_distribution = dm_env.PointMassTaskDistribution( dynamics, use_neg_rew=use_neg_rew, margin=margin) elif env_name == "stacker": dynamics = dm_env.StackerDynamics() task_distribution = dm_env.FingerStackerDistribution( dynamics, use_neg_rew=use_neg_rew, margin=margin) elif env_name == "swimmer": dynamics = dm_env.SwimmerDynamics() task_distribution = dm_env.SwimmerTaskDistribution(dynamics) elif env_name == "fish": dynamics = dm_env.FishDynamics() task_distribution = dm_env.FishTaskDistribution(dynamics) elif env_name == "sawyer-reach": dynamics = sawyer_env.SawyerDynamics() task_distribution = sawyer_env.SawyerReachTaskDistribution(dynamics) else: raise NotImplementedError("Unknown environment: %s" % env_name) gym_env = multitask.Environment( dynamics, task_distribution, constant_task=constant_task) if max_episode_steps is not None: # Add a placeholder spec so the TimeLimit wrapper works. gym_env.spec = registration.EnvSpec("env-v0") gym_env = TimeLimit(gym_env, max_episode_steps) wrapped_env = gym_wrapper.GymWrapper(gym_env, discount=1.0, auto_reset=True) return wrapped_env, task_distribution
def __init__(self, time_limit=100, multi_goal=False, goal_tolerance=None, zero_speed_init=True, tuple_obspace=False, dense_rewards=False, env_config='blocks_simple.xml', goal=[0.0, 0.0], force_duration=10, frame_skip=20, Fmax=450., orient_euler=False, normalize_obs=True): """ Simplified version of the blocks environment - suppose to run faster than the orginal one Under assumption of using low dim features only and applying Fxy actions at the COM of a single block on the scene Observations = [x,y,z, qw, q0, q1, q2], where q == quaternion Actions = [Fx,Fy] :param time_limit: :param multi_goal: :param goal_tolerance: (int or array-like) if None then take tolerance from xml file :param zero_speed_init: :param tuple_obspace: :param dense_rewards: :param env_config: :param goal: :param force_duration: :param Fmax: """ self.render_current_step = False # Viewer parameters self.scene_width = 126 self.scene_height = 126 self.Fmax = Fmax self._goal = None self._goal_static = None self.goal_static = goal self.time_step = 0 self.max_episode_steps = time_limit self.pos_prev = np.array([0, 0, 0]) self.max_dist2goal = 0.4 self.use_static_goal = not multi_goal self.goal_feature_size = 2 #this property should go before goal_tolerance assignment self.goal_tolerance = goal_tolerance self.orient_euler = orient_euler if not orient_euler: self.orient_default = np.array([1., 0., 0., 0.]) self.angle_coord_size = 4 # 4 == quaternions are used, 3 == euler is used self.get_obs_indx_names() else: raise NotImplemented self.pose_size = 3 + self.angle_coord_size #7d self.tuple_obspace = tuple_obspace self.force_duration = force_duration ## Tolerances on velocities to settle as a part of # the logic that waits till the cube stops self.Vxy_atol = 1e-2 self.Vxy_rtol = 1e-2 self.Vang_atol = 1e-2 self.Vang_rtol = 1e-2 self.Vxy_atol = np.array([self.Vxy_atol] * 2) self.Vxy_rtol = np.array([self.Vxy_rtol] * 2) self.Vang_atol = np.array([self.Vang_atol] * self.angle_coord_size) self.Vang_rtol = np.array([self.Vang_rtol] * self.angle_coord_size) self.Vxy_null = np.zeros_like(self.Vxy_atol) self.Vang_null = np.zeros_like(self.Vang_atol) #These introduced for compatibility self.use_distance2center_stop_criteria = True self.use_mnist_stop_criteria = False self.use_mnist_reward = False self.max_action = np.array([1., 1.]) self.zero_speed_init = zero_speed_init self.dense_rewards = dense_rewards # Reading table parameters directly from the xml env_config = os.path.join( os.path.split(os.path.abspath(__file__))[0], env_config) params_xml = self.read_xml_parameters(xml_filename=env_config) self.space_size = params_xml['space_size'] self.table_wall_width = params_xml['table_wall_width'] self.goal_radius = params_xml['goal_radius'] # self.space_size = np.array([1.0,1.0,1.0]) self.camera_distance = 2.5 * params_xml['space_size'][0] self.normalize_obs = False #Initialliy we need to set it to false to prevent using normalization during initialization print('Pickle init ...') utils.EzPickle.__init__(self) print('Mujoco model init ...') mujoco_env.MujocoEnv.__init__(self, model_path=env_config, frame_skip=frame_skip) self.normalize_obs = normalize_obs # self.viewer = self._get_viewer() self.init_qpos = self.model.data.qpos.ravel().copy() self.init_qvel = np.zeros_like(self.model.data.qvel.ravel()) self.init_qfrc = self.model.data.qfrc_applied.ravel().copy() action_low = np.array([-1., -1.]) action_high = np.array([1., 1.]) self.action_space = spaces.Box(action_low, action_high) if goal_tolerance is None: self.goal_tolerance = self.goal_radius self.get_obs_space() print('Pose: ', self.model.data.qpos.flatten()) print('Vel: ', self.model.data.qvel.flatten()) print('Com block: ', self.get_body_com("block")) print('Com goal: ', self.get_body_com("goal")) # self.goal_static = self.pose2goal(goal) #Must be filled in first since it will contain the full state # print('goal static: ', self.goal_static) self.goal = copy.deepcopy(self.goal_static) if self.tuple_obspace: self.observation_space = gym_tuple((self.observation_space, )) if self.spec is None: self.spec = gym_reg.EnvSpec( id='BlocksSimple-v0', max_episode_steps=self.max_episode_steps) self.max_action = np.linalg.norm(np.abs(self.action_space.high)) print('Max action: ', self.max_action)
def __init__(self, raw_control=True, raw_control_zero_middle=True, dim_mode='3D', tf_control=False, sim_steps=4, obs_repr="state_xyz_vxyz_rot_omega", ep_time=3, thrust_noise_ratio=0., obstacles_num=0, room_size=10, init_random_state=False, rew_coeff=None): np.seterr(under='ignore') """ @param obs_repr: options: state_xyz_vxyz_rot_omega, state_xyz_vxyz_quat_omega """ self.init_random_state = init_random_state self.room_size = room_size self.max_init_vel = 1. self.max_init_omega = 2 * np.pi self.room_box = np.array( [[-self.room_size, -self.room_size, 0], [self.room_size, self.room_size, self.room_size]]) self.obs_repr = obs_repr self.state_vector = getattr(self, obs_repr) self.dynamics = default_dynamics(sim_steps, room_box=self.room_box, dim_mode=dim_mode, noise_scale=thrust_noise_ratio) # self.controller = ShiftedMotorControl(self.dynamics) # self.controller = OmegaThrustControl(self.dynamics) ## The last one used # self.controller = VelocityYawControl(self.dynamics) self.scene = None if obstacles_num > 0: self.obstacles = _random_obstacles(None, obstacles_num, self.room_size, self.dynamics.arm) else: self.obstacles = None # self.oracle = NonlinearPositionController(self.dynamics) self.dim_mode = dim_mode if self.dim_mode == '1D': self.viewpoint = 'side' else: self.viewpoint = 'chase' if raw_control: if self.dim_mode == '1D': # Z axis only self.controller = VerticalControl( self.dynamics, zero_action_middle=raw_control_zero_middle) elif self.dim_mode == '2D': # X and Z axes only self.controller = VertPlaneControl( self.dynamics, zero_action_middle=raw_control_zero_middle) elif self.dim_mode == '3D': self.controller = RawControl( self.dynamics, zero_action_middle=raw_control_zero_middle) else: raise ValueError('QuadEnv: Unknown dimensionality mode %s' % self.dim_mode) else: self.controller = NonlinearPositionController( self.dynamics, tf_control=tf_control) self.action_space = self.controller.action_space(self.dynamics) ## Former way to get obs space # # pos, vel, rot, rot vel # obs_dim = 3 + 3 + 9 + 3 + 3 # xyz, Vxyz, R, Omega, goal_xyz # # TODO tighter bounds on some variables # obs_high = 100 * np.ones(obs_dim) # # rotation mtx guaranteed to be orthogonal # obs_high[6:6+9] = 1 # self.observation_space = spaces.Box(-obs_high, obs_high) self.observation_space = self.get_observation_space() # TODO get this from a wrapper self.ep_time = ep_time #In seconds self.dt = 1.0 / 100.0 self.sim_steps = sim_steps self.ep_len = int(self.ep_time / (self.dt * self.sim_steps)) self.tick = 0 self.crashed = False self._seed() # size of the box from which initial position will be randomly sampled # if box_scale > 1.0 then it will also growevery episode self.box = 2.0 self.box_scale = 1.0 #scale the initialbox by this factor eache episode ######################################### ## REWARDS self.rew_coeff = { "pos": 1, "effort": 0.01, "crash": 1, "orient": 1, "vel_proj": 0, "spin_z": 1, "spin_xy": 0.5 } if rew_coeff is not None: assert isinstance(rew_coeff, dict) assert set(rew_coeff.keys()).issubset(set(self.rew_coeff.keys())) self.rew_coeff.update(rew_coeff) self._reset() if self.spec is None: self.spec = gym_reg.EnvSpec(id='Quadrotor-v0', max_episode_steps=self.ep_len) # Always call Serializable constructor last Serializable.quick_init(self, locals())
def __init__(self, raw_control=True, raw_control_zero_middle=True, dim_mode='3D', tf_control=False, sim_steps=4, obs_repr="state_xyz_vxyz_rot_omega", ep_time=3): np.seterr(under='ignore') """ @param obs_repr: options: state_xyz_vxyz_rot_omega, state_xyz_vxyz_quat_omega """ self.room_box = np.array([[-10, -10, 0], [10, 10, 10]]) self.obs_repr = obs_repr self.state_vector = getattr(self, obs_repr) self.dynamics = default_dynamics(sim_steps, room_box=self.room_box, dim_mode=dim_mode) # self.controller = ShiftedMotorControl(self.dynamics) # self.controller = OmegaThrustControl(self.dynamics) ## The last one used # self.controller = VelocityYawControl(self.dynamics) self.scene = None # self.oracle = NonlinearPositionController(self.dynamics) self.dim_mode = dim_mode if self.dim_mode =='1D': self.viewpoint = 'side' else: self.viewpoint = 'chase' if raw_control: if self.dim_mode == '1D': self.controller = VerticalControl(self.dynamics, zero_action_middle=raw_control_zero_middle, dim_mode="1D") elif self.dim_mode == '2D': self.controller = VertPlaneControl(self.dynamics, zero_action_middle=raw_control_zero_middle, dim_mode="2D") elif self.dim_mode == '3D': self.controller = RawControl(self.dynamics, zero_action_middle=raw_control_zero_middle) else: raise ValueError('QuadEnv: Unknown dimensionality mode %s' % self.dim_mode) else: self.controller = NonlinearPositionController(self.dynamics, tf_control=tf_control) self.action_space = self.controller.action_space(self.dynamics) ## Former way to get obs space # # pos, vel, rot, rot vel # obs_dim = 3 + 3 + 9 + 3 + 3 # xyz, Vxyz, R, Omega, goal_xyz # # TODO tighter bounds on some variables # obs_high = 100 * np.ones(obs_dim) # # rotation mtx guaranteed to be orthogonal # obs_high[6:6+9] = 1 # self.observation_space = spaces.Box(-obs_high, obs_high) self.observation_space = self.get_observation_space() # TODO get this from a wrapper self.ep_time = ep_time #In seconds self.dt = 1.0 / 100.0 self.sim_steps = sim_steps self.ep_len = int(self.ep_time / (self.dt * self.sim_steps)) self.tick = 0 # self.dt = 1.0 / 50.0 self.crashed = False self._seed() # size of the box from which initial position will be randomly sampled # if box_scale > 1.0 then it will also growevery episode self.box = 2.0 self.box_scale = 1.0 #scale the initialbox by this factor eache episode self._reset() if self.spec is None: self.spec = gym_reg.EnvSpec(id='Quadrotor-v0', max_episode_steps=self.ep_len)