def __init__(self, asset_path: str = DCLAW3_ASSET_PATH, observation_keys: Sequence[str] = DEFAULT_OBSERVATION_KEYS, frame_skip: int = 40, **kwargs): """Initializes the environment. Args: asset_path: The XML model file to load. observation_keys: The keys in `get_obs_dict` to concatenate as the observations returned by `step` and `reset`. frame_skip: The number of simulation steps per environment step. interactive: If True, allows the hardware guide motor to freely rotate and its current angle is used as the goal. success_threshold: The difference threshold (in radians) of the object position and the goal position within which we consider as a sucesss. """ super().__init__(sim_model=get_asset_path(asset_path), observation_keys=observation_keys, frame_skip=frame_skip, **kwargs) self._desired_claw_pos = RESET_POSE # The following are modified (possibly every reset) by subclasses. self._initial_object_pos = 0 self._initial_object_vel = 0
def __init__(self, asset_path: str = DCLAW3_ASSET_PATH, observation_keys: Sequence[str] = DEFAULT_OBSERVATION_KEYS, frame_skip: int = 40, interactive: bool = False, **kwargs): """Initializes the environment. Args: asset_path: The XML model file to load. observation_keys: The keys in `get_obs_dict` to concatenate as the observations returned by `step` and `reset`. frame_skip: The number of simulation steps per environment step. interactive: If True, allows the hardware guide motor to freely rotate and its current angle is used as the goal. """ super().__init__( sim_model=get_asset_path(asset_path), observation_keys=observation_keys, frame_skip=frame_skip, **kwargs) self._interactive = interactive self._desired_claw_pos = RESET_POSE self._target_bid = self.model.body_name2id('target') # The following are modified (possibly every reset) by subclasses. self._initial_object_pos = 0 self._initial_object_vel = 0 self._set_target_object_pos(0)
def __init__(self, asset_path: str = DKITTY_ASSET_PATH, observation_keys: Sequence[str] = DEFAULT_OBSERVATION_KEYS, target_tracker_id: Optional[Union[str, int]] = None, heading_tracker_id: Optional[Union[str, int]] = None, frame_skip: int = 40, upright_threshold: float = 0.9, upright_reward: float = 1, falling_reward: float = -500, **kwargs): """Initializes the environment. Args: asset_path: The XML model file to load. observation_keys: The keys in `get_obs_dict` to concatenate as the observations returned by `step` and `reset`. target_tracker_id: The device index or serial of the tracking device for the target location. heading_tracker_id: The device index or serial of the tracking device for the heading direction. This defaults to the target tracker. frame_skip: The number of simulation steps per environment step. upright_threshold: The threshold (in [0, 1]) above which the D'Kitty is considered to be upright. If the cosine similarity of the D'Kitty's z-axis with the global z-axis is below this threshold, the D'Kitty is considered to have fallen. upright_reward: The reward multiplier for uprightedness. falling_reward: The reward multipler for falling. """ self._target_tracker_id = target_tracker_id self._heading_tracker_id = heading_tracker_id if self._heading_tracker_id is None: self._heading_tracker_id = self._target_tracker_id super().__init__( sim_model=get_asset_path(asset_path), observation_keys=observation_keys, frame_skip=frame_skip, upright_threshold=upright_threshold, upright_reward=upright_reward, falling_reward=falling_reward, **kwargs) self._initial_target_pos = np.zeros(3) self._initial_heading_pos = None
def __init__(self, asset_path: str = DCLAW3_ASSET_PATH, observation_keys: Sequence[str] = DEFAULT_OBSERVATION_KEYS, frame_skip: int = 20, **kwargs): """Initializes the environment. Args: asset_path: The XML model file to load. observation_keys: The keys in `get_obs_dict` to concatenate as the observations returned by `step` and `reset`. frame_skip: The number of simulation steps per environment step. """ super().__init__(sim_model=get_asset_path(asset_path), observation_keys=observation_keys, frame_skip=frame_skip, **kwargs) self._initial_pos = np.zeros(9) self._desired_pos = np.zeros(9)
def __init__( self, asset_path: str = DKITTY_ASSET_PATH, observation_keys: Sequence[str] = DEFAULT_OBSERVATION_KEYS, frame_skip: int = 40, upright_threshold: float = 0, # cos(90deg) upright_reward: float = 2, falling_reward: float = -100, **kwargs): """Initializes the environment. Args: asset_path: The XML model file to load. observation_keys: The keys in `get_obs_dict` to concatenate as the observations returned by `step` and `reset`. device_path: The device path to Dynamixel hardware. torso_tracker_id: The device index or serial of the tracking device for the D'Kitty torso. frame_skip: The number of simulation steps per environment step. upright_threshold: The threshold (in [0, 1]) above which the D'Kitty is considered to be upright. If the cosine similarity of the D'Kitty's z-axis with the global z-axis is below this threshold, the D'Kitty is considered to have fallen. upright_reward: The reward multiplier for uprightedness. falling_reward: The reward multipler for falling. """ super().__init__( sim_model=get_asset_path(asset_path), observation_keys=observation_keys, frame_skip=frame_skip, upright_threshold=upright_threshold, upright_reward=upright_reward, falling_reward=falling_reward, **kwargs) self._desired_pose = np.zeros(12) self._initial_pose = np.zeros(12)
def __init__(self, asset_path: str = DKITTY_ASSET_PATH, observation_keys: Sequence[str] = DEFAULT_OBSERVATION_KEYS, device_path: Optional[str] = None, torso_tracker_id: Optional[Union[str, int]] = None, frame_skip: int = 40, sticky_action_probability: float = 0.0, upright_threshold: float = 0.9, upright_reward: float = 1, falling_reward: float = -500, expose_last_action: bool = True, expose_upright: bool = True, robot_noise_ratio: float = 0.05, **kwargs): """Initializes the environment. Args: asset_path: The XML model file to load. observation_keys: The keys in `get_obs_dict` to concatenate as the observations returned by `step` and `reset`. device_path: The device path to Dynamixel hardware. torso_tracker_id: The device index or serial of the tracking device for the D'Kitty torso. frame_skip: The number of simulation steps per environment step. sticky_action_probability: Repeat previous action with this probability. Default 0 (no sticky actions). upright_threshold: The threshold (in [0, 1]) above which the D'Kitty is considered to be upright. If the cosine similarity of the D'Kitty's z-axis with the global z-axis is below this threshold, the D'Kitty is considered to have fallen. upright_reward: The reward multiplier for uprightedness. falling_reward: The reward multipler for falling. """ self._expose_last_action = expose_last_action self._expose_upright = expose_upright observation_keys = observation_keys[:-2] if self._expose_last_action: observation_keys += ("last_action", ) if self._expose_upright: observation_keys += ("upright", ) # robot_config = self.get_robot_config(device_path) # if 'sim_observation_noise' in robot_config.keys(): # robot_config['sim_observation_noise'] = robot_noise_ratio super().__init__( sim_model=get_asset_path(asset_path), # robot_config=robot_config, # tracker_config=self.get_tracker_config( # torso=torso_tracker_id, # ), observation_keys=observation_keys, frame_skip=frame_skip, upright_threshold=upright_threshold, upright_reward=upright_reward, falling_reward=falling_reward, **kwargs) self._last_action = np.zeros(12) self._sticky_action_probability = sticky_action_probability self._time_step = 0