def observation_space(self): return Box(low=-np.ones(self.params['obs_len']), high=np.ones(self.params['obs_len']))
def observation_space(self): return Box(low=-np.inf, high=np.inf, shape=(7, ))
def _mod_hfield(self, hfield): # clear a flat patch for the robot to start off from return terrain.clear_patch(hfield, Box(np.array([-1.0, -1.0]), np.array([-0.5, -0.5])))
def action_space(self): return Box(low=np.array([-1, -1, -1]), high=np.array([1, 1, 1]))
def action_space(self): """allowed value range for actions""" return Box(low=-self.delta_l, high=self.delta_l, shape=(3, ))
class SawyerReachTorqueEnv(MujocoEnv, Serializable, MultitaskEnv): """Implements a torque-controlled Sawyer environment""" def __init__( self, frame_skip=10, action_scale=10, xyz_obs=False, keep_vel_in_obs=True, use_safety_box=False, fix_goal=False, fixed_goal=(0.05, 0.6, 0.15), reward_type='hand_distance', ctrl_cost_coef=0.0, indicator_threshold=.05, goal_low=None, goal_high=None, ): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) if goal_low is None: goal_low = np.array([-0.1, 0.5, 0.02]) else: goal_low = np.array(goal_low) if goal_high is None: goal_high = np.array([0.1, 0.7, 0.2]) else: goal_high = np.array(goal_low) self.safety_box = Box(goal_low, goal_high) self.xyz_obs = xyz_obs self.keep_vel_in_obs = keep_vel_in_obs self.ctrl_cost_coef = ctrl_cost_coef self.goal_space = Box(goal_low, goal_high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box(-np.inf * np.ones(3), np.inf * np.ones(3)) self._observation_space_dict = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) # obs space: (current_xyz_pos, desired_xyz_pos) self.observation_space = Box(np.concatenate([high, goal_low]), np.concatenate([low, goal_high])) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.use_safety_box = use_safety_box self.prev_qpos = self.init_angles.copy() self.reward_type = reward_type self.indicator_threshold = indicator_threshold goal = self.sample_goal() self._state_goal = goal['state_desired_goal'] self.reset() @property def model_name(self): return 'sawyer_reach_torque.xml' def reset_to_prev_qpos(self): angles = self.data.qpos.copy() velocities = self.data.qvel.copy() angles[:] = self.prev_qpos.copy() velocities[:] = 0 self.set_state(angles.flatten(), velocities.flatten()) self.set_goal_xyz(self._state_goal) def is_outside_box(self): pos = self.get_endeff_pos() return not self.safety_box.contains(pos) def set_to_qpos(self, qpos): angles = self.data.qpos.copy() velocities = self.data.qvel.copy() angles[:] = qpos velocities[:] = 0 self.set_state(angles.flatten(), velocities.flatten()) self.set_goal_xyz(self._state_goal) def viewer_setup(self): self.viewer.cam.trackbodyid = 0 self.viewer.cam.distance = 1.0 # 3rd person view cam_dist = 0.3 rotation_angle = 270 cam_pos = np.array([0, 1.0, 0.5, cam_dist, -45, rotation_angle]) for i in range(3): self.viewer.cam.lookat[i] = cam_pos[i] self.viewer.cam.distance = cam_pos[3] self.viewer.cam.elevation = cam_pos[4] self.viewer.cam.azimuth = cam_pos[5] self.viewer.cam.trackbodyid = -1 def step(self, action): action = action * self.action_scale self.do_simulation(action, self.frame_skip) if self.use_safety_box: if self.is_outside_box(): self.reset_to_prev_qpos() else: self.prev_qpos = self.data.qpos.copy() obs_dict = self._get_obs_dict() info = self._get_info() reward = self.compute_reward(action, obs_dict) obs = self._convert_obs_dict_to_obs(obs_dict) done = False return obs, reward, done, info def done(self, obs): if obs.ndim == 2: return np.zeros(obs.shape[0], dtype=np.bool) else: return False def reward(self, obs, action, obs_next): if obs_next.ndim == 2 and action.ndim == 2: hand_pos = obs_next[:, 0:3] goals = obs_next[:, -3:] distance = np.linalg.norm(hand_pos - goals, axis=1) ctrl_cost = self.ctrl_cost_coef * np.sum(action**2, axis=1) if self.reward_type == 'hand_distance': r = -distance elif self.reward_type == 'hand_success': r = -(distance < self.indicator_threshold).astype(float) else: raise NotImplementedError("Invalid/no reward type.") return r - ctrl_cost else: return self.reward(np.array([obs]), np.array([action]), np.array([obs_next]))[0] def _get_env_obs(self): if self.xyz_obs: if self.keep_vel_in_obs: return np.concatenate([ self.get_endeff_pos(), # end-effector xyz position self.get_endeff_rot( ), # end-effector rotation (quaternion) self.get_endeff_vel(), # end-effector xyz velocity ]) else: return np.concatenate([ self.get_endeff_pos(), self.get_endeff_rot(), ]) else: if self.keep_vel_in_obs: return np.concatenate([ self.get_endeff_pos(), self.sim.data.qpos.flat, self.sim.data.qvel.flat, ]) else: return np.concatenate([ self.get_endeff_pos(), self.sim.data.qpos.flat, ]) def _get_obs_dict(self): ee_pos = self.get_endeff_pos() state_obs = self._get_env_obs() return dict( observation=state_obs, desired_goal=self._state_goal, achieved_goal=ee_pos, state_observation=state_obs, state_desired_goal=self._state_goal, state_achieved_goal=ee_pos, ) def _convert_obs_dict_to_obs(self, obs_dict): return np.concatenate( [obs_dict['observation'], obs_dict['desired_goal']]) def _get_obs(self): return self._convert_obs_dict_to_obs(self._get_obs_dict()) def _get_info(self): hand_distance = np.linalg.norm(self._state_goal - self.get_endeff_pos()) return dict( hand_distance=hand_distance, hand_success=float(hand_distance < self.indicator_threshold), ) def get_endeff_pos(self): return self.data.body_xpos[self.endeff_id].copy() def get_endeff_rot(self): return self.data.body_xquat[self.endeff_id].copy() def get_endeff_vel(self): return self.data.body_xvelp[self.endeff_id].copy() def reset(self, reset_args=None): angles = self.data.qpos.copy() velocities = self.data.qvel.copy() angles[:] = self.init_angles velocities[:] = 0 self.set_state(angles.flatten(), velocities.flatten()) goal = self.sample_goal() self._state_goal = goal['state_desired_goal'] self.set_goal_xyz(self._state_goal) self.sim.forward() self.prev_qpos = self.data.qpos.copy() return self._get_obs() def set_goal_xyz(self, pos): qpos = self.data.qpos.flat.copy() qvel = self.data.qvel.flat.copy() qpos[7:10] = pos.copy() qvel[7:10] = [0, 0, 0] self.set_state(qpos, qvel) @property def init_angles(self): return [ 1.02866769e+00, -6.95207647e-01, 4.22932911e-01, 1.76670458e+00, -5.69637604e-01, 6.24117280e-01, 3.53404635e+00, 1.07586388e-02, 6.62018003e-01, 2.09936716e-02, 1.00000000e+00, 3.76632959e-14, 1.36837913e-11, 1.56567415e-23 ] @property def endeff_id(self): return self.model.body_names.index('leftclaw') @property def goal_id(self): return self.model.body_names.index('goal') def get_diagnostics(self, paths, prefix=''): statistics = OrderedDict() for stat_name in [ 'hand_distance', 'hand_success', ]: stat_name = stat_name stat = get_stat_in_paths(paths, 'env_infos', stat_name) statistics.update( create_stats_ordered_dict( '%s%s' % (prefix, stat_name), stat, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( 'Final %s%s' % (prefix, stat_name), [s[-1] for s in stat], always_show_all_stats=True, )) return statistics """ Multitask functions """ @property def goal_dim(self) -> int: return 3 def get_goal(self): return { 'desired_goal': self._state_goal, 'state_desired_goal': self._state_goal, } def sample_goals(self, batch_size): if self.fix_goal: goals = np.repeat(self.fixed_goal.copy()[None], batch_size, 0) else: goals = np.random.uniform( self.goal_space.low, self.goal_space.high, size=(batch_size, self.goal_space.low.size), ) return { 'desired_goal': goals, 'state_desired_goal': goals, } def compute_rewards(self, actions, obs): achieved_goals = obs['achieved_goal'] desired_goals = obs['desired_goal'] hand_pos = achieved_goals goals = desired_goals ctrl_cost = self.ctrl_cost_coef * np.sum(np.abs(actions), axis=1) distances = np.linalg.norm(hand_pos - goals, axis=1) if self.reward_type == 'hand_distance': r = -distances elif self.reward_type == 'hand_success': r = -(distances < self.indicator_threshold).astype(float) else: raise NotImplementedError("Invalid/no reward type.") return r - ctrl_cost def set_to_goal(self, goal): raise NotImplementedError() def get_env_state(self): joint_state = self.sim.get_state() goal = self._state_goal.copy() return joint_state, goal def set_env_state(self, state): state, goal = state self.sim.set_state(state) self.sim.forward() self._state_goal = goal def log_diagnostics(self, paths): diagnostics = self.get_diagnostics(paths) logger.record_tabular('HandDistanceMean', diagnostics['hand_distance Mean']) logger.record_tabular('FinalHandDistanceMean', diagnostics['Final hand_distance Mean']) logger.record_tabular('FinalHandSuccessMean', diagnostics['Final hand_success Mean'])
def observation_space(self): high = np.full((26, ), 1e20) low = -high return Box(low, high)
def action_space(self): return Box(low=-0.3, high=0.3, shape=(15, ))
def observation_space(self): return Box(low=-np.ones(4), high=np.ones(4))
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_pos=(0, 0.6, 0.02), fix_goal=True, fixed_goal=(0, 0.85, 0.02, 0, 0.85, 0.02), #3D placing goal, for hand and object height_target=0.1, goal_low=None, goal_high=None, hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.max_path_length = 150 self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.obj_init_pos = np.array(obj_init_pos) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.height_target = height_target self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.observation_space = Box( np.hstack((self.hand_and_obj_space.low, np.array(GRIPPER_STATE_LIMIT[0]))), np.hstack((self.hand_and_obj_space.high, np.array(GRIPPER_STATE_LIMIT[1]))), ) self.goal_space = Box(goal_low, goal_high) self._observation_space_dict = Dict([ ('observation', self.observation_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.observation_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ]) self.observation_space = Box( np.concatenate( [self.observation_space.low, self.hand_and_obj_space.low]), np.concatenate( [self.observation_space.high, self.hand_and_obj_space.high])) self.reset()
def observation_space(self): return Box(low=-1e6, high=1e6, shape=(self.usedDim * self.window, ))
def observation_space(self): return Box(self.low, self.high)
def observation_space(self): return Box(low=-10 * np.ones(2), high=10 * np.ones(2))
def observation_space(self): return Box(np.array([-1, -1, 0]), np.array([1000, 100, np.inf]))
def observation_space(self): obs_limit = np.inf * np.ones(self.obs_dim) return Box(-obs_limit, obs_limit)
def action_space(self): return Box(low=-np.ones(2), high=np.ones(2))
def action_space(self): # return Box(low=-np.ones(2), high=np.ones(2)) return Box(low=np.array([0.0, -1]), high=np.array([1.0, 1]))
def action_space(self): return Box(low=5.0 * self.mu1, high=5.0 * self.mu2, shape=(1, ))
def __init__( self, frame_skip=10, action_scale=10, xyz_obs=False, keep_vel_in_obs=True, use_safety_box=False, fix_goal=False, fixed_goal=(0.05, 0.6, 0.15), reward_type='hand_distance', ctrl_cost_coef=0.0, indicator_threshold=.05, goal_low=None, goal_high=None, ): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) if goal_low is None: goal_low = np.array([-0.1, 0.5, 0.02]) else: goal_low = np.array(goal_low) if goal_high is None: goal_high = np.array([0.1, 0.7, 0.2]) else: goal_high = np.array(goal_low) self.safety_box = Box(goal_low, goal_high) self.xyz_obs = xyz_obs self.keep_vel_in_obs = keep_vel_in_obs self.ctrl_cost_coef = ctrl_cost_coef self.goal_space = Box(goal_low, goal_high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box(-np.inf * np.ones(3), np.inf * np.ones(3)) self._observation_space_dict = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) # obs space: (current_xyz_pos, desired_xyz_pos) self.observation_space = Box(np.concatenate([high, goal_low]), np.concatenate([low, goal_high])) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.use_safety_box = use_safety_box self.prev_qpos = self.init_angles.copy() self.reward_type = reward_type self.indicator_threshold = indicator_threshold goal = self.sample_goal() self._state_goal = goal['state_desired_goal'] self.reset()
def action_space(self): return Box(low=-1, high=1, shape=(self.n_size * self.n_size, ))
def observation_space(self): return Box(low=np.array([0, -self.tensioner.max_displacement, -self.tensioner.max_displacement, -self.tensioner.max_displacement]), high=np.array([len(self.trajectory) + 1, self.tensioner.max_displacement, self.tensioner.max_displacement, self.tensioner.max_displacement]))
def observation_space(self): ''' Define the shape of input vector to the neural network. ''' return Box(low=-np.inf, high=np.inf, shape=(5, ))
def observation_space(self): """allowed value range for states""" return Box(low=-np.inf, high=np.inf, shape=(10, ))
def __init__(self, init_puck_low=[0.0, 0.6], init_puck_high=[0.0, 0.6], reward_type='hand_and_puck_distance', indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), puck_goal_low=[-0.3, 0.55], puck_goal_high=[-0.1, 0.75], hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if init_puck_low is None: init_puck_low = self.hand_low[:2] if init_puck_high is None: init_puck_high = self.hand_high[:2] init_puck_low = np.array(init_puck_low) init_puck_high = np.array(init_puck_high) self.puck_low = init_puck_low self.puck_high = init_puck_high """ Set goal sampling range """ if puck_goal_low is None: goal_low = np.hstack((self.hand_low, self.hand_low[:2])) else: goal_low = np.hstack((self.hand_low, puck_goal_low)) if puck_goal_high is None: goal_high = np.hstack((self.hand_high, self.hand_high[:2])) else: goal_high = np.hstack((self.hand_high, puck_goal_high)) self.hand_and_puck_goal_space = Box(goal_low, goal_high) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_and_puck_space = Box( np.hstack((self.hand_low, self.hand_low[0:2])), np.hstack((self.hand_high, self.hand_high[0:2])), ) self.hand_space = Box(self.hand_low, self.hand_high) self._observation_space_dict = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = self.get_puck_pos()[2] self.observation_space = Box( np.concatenate([ self.hand_and_puck_space.low, self.hand_and_puck_goal_space.low ]), np.concatenate([ self.hand_and_puck_space.high, self.hand_and_puck_goal_space.high ]))
def action_space(self): return Box(low=-0.1, high=0.1, shape=(2,))
def action_space(self): return Box(low=-1, high=1, shape=(3, ))
def latent_space(self): return Box(low=-np.inf, high=np.inf, shape=(1, ))
def action_space(self): return Box(low=self.act_limits_array[:, 0], high=self.act_limits_array[:, 1])
def test_box(): space = Box(low=-1, high=1, shape=(2, 2)) np.testing.assert_array_equal(space.flatten([[1, 2], [3, 4]]), [1, 2, 3, 4]) np.testing.assert_array_equal(space.flatten_n([[[1, 2], [3, 4]]]), [[1, 2, 3, 4]]) np.testing.assert_array_equal(space.unflatten([1, 2, 3, 4]), [[1, 2], [3, 4]]) np.testing.assert_array_equal(space.unflatten_n([[1, 2, 3, 4]]), [[[1, 2], [3, 4]]])
def observation_space(self): return Box(low=-1, high=1, shape=(self.params['grid_n'], self.params['grid_m'], 3))