def __init__(self, action_scale=1, frame_skip=5, reward_type='vel_distance', indicator_threshold=.1, fixed_goal=5, fix_goal=False, max_speed=6): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) self.reward_type = reward_type self.indicator_threshold=indicator_threshold self.fixed_goal = fixed_goal self.fix_goal = fix_goal self._state_goal = None self.goal_space = Box(np.array(-1*max_speed), np.array(max_speed)) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box(self.obs_space.low[8], self.obs_space.high[8]) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) self.reset()
def __init__(self, reward_type='hand_distance', norm_order=1, indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.15, 0.6, 0.3), hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_space = Box(self.hand_low, self.hand_high) self.observation_space = Dict([ ('observation', self.hand_space), ('desired_goal', self.hand_space), ('achieved_goal', self.hand_space), ('state_observation', self.hand_space), ('state_desired_goal', self.hand_space), ('state_achieved_goal', self.hand_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ])
def __init__(self, action_scale=1, frame_skip=5, force=None, timestep_start=100000, timestep_end=100000): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.observation_space = Dict([ ('observation', self.obs_space), ('state_observation', self.obs_space), ]) self.force = force self.timestep_start = timestep_start self.timestep_end = timestep_end self.step_count = 0 self.reset()
def __init__(self, action_scale=1, frame_skip=5, obstacle_position=None, obstacle_height=None): self.quick_init(locals()) self.obstacle_position = obstacle_position self.obstacle_height = obstacle_height print(self.obstacle_position, self.obstacle_height) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.observation_space = Dict([ ('observation', self.obs_space), ('state_observation', self.obs_space), ]) # self.obstacle_position = obstacle_position # self.obstacle_height = obstacle_height self.reset()
def __init__(self, frame_skip=30, action_scale=10, keep_vel_in_obs=True, use_safety_box=False, fix_goal=False, fixed_goal=(0.05, 0.6, 0.15), reward_type='hand_distance', indicator_threshold=.05, goal_low=None, goal_high=None, ): self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) if goal_low is None: goal_low = np.array([-0.1, 0.5, 0.02]) else: goal_low = np.array(goal_low) if goal_high is None: goal_high = np.array([0.1, 0.7, 0.2]) else: goal_high = np.array(goal_low) self.safety_box = Box( goal_low, goal_high ) self.keep_vel_in_obs = keep_vel_in_obs self.goal_space = Box(goal_low, goal_high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low, high) self.achieved_goal_space = Box( -np.inf * np.ones(3), np.inf * np.ones(3) ) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.achieved_goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.achieved_goal_space), ]) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.use_safety_box=use_safety_box self.prev_qpos = self.init_angles.copy() self.reward_type = reward_type self.indicator_threshold = indicator_threshold goal = self.sample_goal() self._state_goal = goal['state_desired_goal'] self.reset()
def __init__( self, goal_low=(-.25, .3, .12, -1.5708), goal_high=(.25, .6, .12, 0), action_reward_scale=0, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(0, .45, .12, -.25), reset_free=False, fixed_hand_z=0.12, hand_low=(-0.25, 0.3, .12), hand_high=(0.25, 0.6, .12), target_pos_scale=1, target_angle_scale=1, min_angle=-1.5708, max_angle=0, xml_path='sawyer_xyz/sawyer_door_pull.xml', **sawyer_xyz_kwargs ): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) SawyerXYZEnv.__init__( self, self.model_name, hand_low=hand_low, hand_high=hand_high, **sawyer_xyz_kwargs ) MultitaskEnv.__init__(self) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box(np.array(goal_low), np.array(goal_high)) self._state_goal = None self.fixed_hand_z = fixed_hand_z self.action_space = Box(np.array([-1, -1]), np.array([1, 1])) self.state_space = Box( np.concatenate((hand_low, [min_angle])), np.concatenate((hand_high, [max_angle])), ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.state_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.state_space), ]) self.action_reward_scale = action_reward_scale self.target_pos_scale = target_pos_scale self.target_angle_scale = target_angle_scale self.reset_free = reset_free self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint') self.reset()
def __init__(self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_pos=(0, 0.65, 0.03), fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, hide_goal_markers=False, hide_arm=False, num_objects=1, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) self.hide_arm = hide_arm SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) self.num_objects = num_objects if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_low is None: goal_low = np.hstack((self.hand_low, np.tile(obj_low, num_objects))) if goal_high is None: goal_high = np.hstack( (self.hand_high, np.tile(obj_high, num_objects))) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.obj_init_pos = np.array(obj_init_pos) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, np.tile(obj_low, num_objects))), np.hstack((self.hand_high, np.tile(obj_high, num_objects))), ) self.observation_space = Dict([ ('observation', self.hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ])
def __init__( self, frame_skip=30, goal_low=(-.1, .5, .06, 0), goal_high=(.1, .6, .06, .5), action_reward_scale=0, pos_action_scale=1 / 100, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(0.15, 0.6, 0.3, 0), target_pos_scale=0.25, target_angle_scale=1, max_x_pos=.1, max_y_pos=.7, ): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box( np.array(goal_low), np.array(goal_high), ) self._state_goal = None self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1])) max_angle = 1.5708 self.state_space = Box( np.array([-1, -1, -1, -max_angle]), np.array([1, 1, 1, max_angle]), ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.state_space), ('achieved_goal', self.state_space), ('state_observation', self.state_space), ('state_desired_goal', self.state_space), ('state_achieved_goal', self.state_space), ]) self._pos_action_scale = pos_action_scale self.target_pos_scale = target_pos_scale self.action_reward_scale = action_reward_scale self.target_angle_scale = target_angle_scale self.max_x_pos = max_x_pos self.max_y_pos = max_y_pos self.min_y_pos = .5 self.reset() self.reset_mocap_welds()
def __init__(self, puck_low=None, puck_high=None, reward_type='hand_and_puck_distance', indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) goal_low = np.array(goal_low) goal_high = np.array(goal_high) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), ) self.hand_space = Box(self.hand_low, self.hand_high) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = self.get_puck_pos()[2]
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_pos=(0, 0.6, 0.02), fix_goal=True, fixed_goal=(0, 0.85, 0.12, 0.1), #3D placing goal, followed by height target for picking goal_low=None, goal_high=None, hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.max_path_length = 200 self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.obj_init_pos = np.array(obj_init_pos) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.observation_space = Dict([ ('observation', self.hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ]) self.reset()
def step(self, action): self._act(action) observation = self._get_obs() reward = MultitaskEnv.compute_reward(self, action, observation) info = self._get_info() done = False return observation, reward, done, info
def __init__( self, frame_skip=50, goal_low=-.5, goal_high=.5, pos_action_scale=1 / 100, action_reward_scale=0, reward_type='angle_difference', indicator_threshold=0.02, fix_goal=False, fixed_goal=.25, ): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array([fixed_goal]) self.goal_space = Box(np.array([goal_low]), np.array([goal_high])) self._state_goal = None self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1])) max_angle = 1.5708 self.state_space = Box( np.array([-1, -1, -1, -max_angle]), np.array([1, 1, 1, max_angle]), ) self.angle_space = Box(np.array([-max_angle]), np.array([max_angle])) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.angle_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.angle_space), ]) self._pos_action_scale = pos_action_scale self.action_reward_scale = action_reward_scale self.reset() self.reset_mocap_welds()
def __init__(self, action_scale=1, frame_skip=5): # self.init_serialization(locals()) self.quick_init(locals()) MultitaskEnv.__init__(self) self.action_scale = action_scale MujocoEnv.__init__(self, self.model_name, frame_skip=frame_skip) bounds = self.model.actuator_ctrlrange.copy() low = bounds[:, 0] high = bounds[:, 1] self.action_space = Box(low=low, high=high) obs_size = self._get_env_obs().shape[0] high = np.inf * np.ones(obs_size) low = -high self.obs_space = Box(low=low, high=high) self.observation_space = Dict([ ('observation', self.obs_space), ('state_observation', self.obs_space), ]) self.reset()
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_positions=((0, 0.6, 0.02),), random_init=False, fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, reset_free=False, hide_goal_markers=False, oracle_reset_prob=0.0, presampled_goals=None, num_goals_presampled=1000, p_obj_in_hand=.75, **kwargs ): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__( self, model_name=self.model_name, **kwargs ) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high self.obj_low = obj_low self.obj_high = obj_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.reward_type = reward_type self.random_init = random_init self.p_obj_in_hand = p_obj_in_hand self.indicator_threshold = indicator_threshold self.obj_init_z = obj_init_positions[0][2] self.obj_init_positions = np.array(obj_init_positions) self.last_obj_pos = self.obj_init_positions[0] self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.reset_free = reset_free self.oracle_reset_prob = oracle_reset_prob self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32 ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), dtype=np.float32 ) self.hand_space = Box( self.hand_low, self.hand_high, dtype=np.float32 ) self.gripper_and_hand_and_obj_space = Box( np.hstack(([0.0], self.hand_low, obj_low)), np.hstack(([0.04], self.hand_high, obj_high)), dtype=np.float32 ) self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.gripper_and_hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.hand_reset_pos = np.array([0, .6, .2]) if presampled_goals is not None: self._presampled_goals = presampled_goals self.num_goals_presampled = len(list(self._presampled_goals.values)[0]) else: # presampled_goals will be created when sample_goal is first called self._presampled_goals = None self.num_goals_presampled = num_goals_presampled self.picked_up_object = False self.train_pickups = 0 self.eval_pickups = 0 self.cur_mode = 'train' self.reset()
def __init__(self, puck_low=(-.4, .2), puck_high=(.4, 1), reward_type='state_distance', norm_order=1, indicator_threshold=0.06, hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=False, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8), hide_goal_markers=False, init_puck_z=0.02, init_hand_xyz=(0, 0.4, 0.07), reset_free=False, xml_path='sawyer_xyz/sawyer_push_puck.xml', clamp_puck_on_step=False, puck_radius=.07, **kwargs): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_puck_space = Box(np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), dtype=np.float32) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self.init_hand_xyz = np.array(init_hand_xyz) self._set_puck_xy(self.sample_puck_xy()) self.reset_free = reset_free self.reset_counter = 0 self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.clamp_puck_on_step = clamp_puck_on_step self.puck_radius = puck_radius self.reset()
def __init__(self, goal_low=(0, ), goal_high=(1.0472, ), action_reward_scale=0, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(-.25), reset_free=False, fixed_hand_z=0.12, hand_low=(-0.1, 0.45, 0.15), hand_high=(0., 0.65, .225), target_pos_scale=1, target_angle_scale=1, min_angle=0, max_angle=1.0472, xml_path='sawyer_xyz/sawyer_door_pull_hook.xml', **sawyer_xyz_kwargs): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) SawyerXYZEnv.__init__(self, self.model_name, hand_low=hand_low, hand_high=hand_high, **sawyer_xyz_kwargs) MultitaskEnv.__init__(self) # self.initialize_camera(camera) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box(np.array(goal_low), np.array(goal_high), dtype=np.float32) self._state_goal = None self.fixed_hand_z = fixed_hand_z self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.state_space = Box( np.concatenate((hand_low, [min_angle])), np.concatenate((hand_high, [max_angle])), dtype=np.float32, ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ]) self.action_reward_scale = action_reward_scale self.target_pos_scale = target_pos_scale self.target_angle_scale = target_angle_scale self.reset_free = reset_free self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint') #ensure env does not start in weird positions self.reset_free = True self.reset() self.reset_free = reset_free
def __init__( self, puck_low=(-.35, .25), puck_high=(.35, .95), norm_order=1, indicator_threshold=0.06, touch_threshold=0.1, # I just chose this number after doing a few runs and looking at a histogram hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=True, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -0.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8), init_puck_z=0.035, init_hand_xyz=(0, 0.4, 0.07), reset_free=False, xml_path='sawyer_xyz/sawyer_push_puck.xml', clamp_puck_on_step=False, puck_radius=.07, **kwargs ): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) self.goal_type = kwargs.pop('goal_type', 'puck') self.dense_reward = kwargs.pop('dense_reward', False) self.indicator_threshold = kwargs.pop('goal_tolerance', indicator_threshold) self.fixed_goal = np.array(kwargs.pop('goal', fixed_goal)) self.task_agnostic = kwargs.pop('task_agnostic', False) self.init_puck_xy = np.array(kwargs.pop('init_puck_xy', [0, 0.6])) self.init_hand_xyz = np.array(kwargs.pop('init_hand_xyz', init_hand_xyz)) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__( self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs ) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] self.puck_low = np.array(puck_low) self.puck_high = np.array(puck_high) if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.norm_order = norm_order self.touch_threshold = touch_threshold self.fix_goal = fix_goal self._state_goal = None self.hide_goal_markers = self.task_agnostic self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), dtype=np.float32 ) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self._set_puck_xy(self.sample_puck_xy()) self.reset_free = reset_free self.reset_counter = 0 self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.clamp_puck_on_step = clamp_puck_on_step self.puck_radius = puck_radius self.reset()
def __init__( self, reward_type='dense', count_bonus_coeff=None, n_bins=16, target_radius=0.5, # Required distance to goal when using sparse reward norm_order=2, frame_skip=5, two_frames=False, vel_in_state=True, ant_low=list([-6, -6]), ant_high=list([6, 6]), goal_low=list([-6, -6]), goal_high=list([6, 6]), model_path='classic_mujoco/normal_gear_ratio_ant.xml', use_low_gear_ratio=False, goal_is_xy=False, goal_is_qpos=False, init_qpos=None, fixed_goal=None, diagnostics_goal=None, init_xy_mode='corner', terminate_when_unhealthy=False, healthy_z_range=(0.2, 0.9), # health_reward=10, done_penalty=0, goal_sampling_strategy='uniform', presampled_goal_paths='', *args, **kwargs): assert init_xy_mode in { 'corner', 'sample-uniformly-xy-space', 'sample-from-goal-space', # soon to be deprecated } assert not goal_is_xy or not goal_is_qpos self.quick_init(locals()) MultitaskEnv.__init__(self) if use_low_gear_ratio: model_path = "classic_mujoco/ant_maze_gear30_small_dt3_with_invis.xml" MujocoEnv.__init__(self, model_path=get_asset_full_path(model_path), frame_skip=frame_skip, **kwargs) if goal_is_xy: assert reward_type.startswith('xy') if init_qpos is not None: self.init_qpos[:len(init_qpos)] = np.array(init_qpos) self.action_space = Box(-np.ones(8), np.ones(8), dtype=np.float32) self.reward_type = reward_type self.count_bonus_coeff = count_bonus_coeff self.n_bins = n_bins self.bin_counts = np.ones((self.n_bins, self.n_bins)) self.target_radius = target_radius self.norm_order = norm_order self.goal_is_xy = goal_is_xy self.goal_is_qpos = goal_is_qpos self.fixed_goal = fixed_goal self.diagnostics_goal = diagnostics_goal print(f"[Ant] Diagnostics goal: {self.diagnostics_goal}") self.init_xy_mode = init_xy_mode self.terminate_when_unhealthy = terminate_when_unhealthy # self._healthy_reward = health_reward self._done_penalty = done_penalty self._healthy_z_range = healthy_z_range self.model_path = model_path assert goal_sampling_strategy in {'uniform', 'preset1', 'presampled'} self.goal_sampling_strategy = goal_sampling_strategy if self.goal_sampling_strategy == 'presampled': assert presampled_goal_paths is not None if not osp.exists(presampled_goal_paths): presampled_goal_paths = get_asset_full_path( presampled_goal_paths) self.presampled_goals = np.load(presampled_goal_paths) else: self.presampled_goals = None self.ant_low, self.ant_high = np.array(ant_low), np.array(ant_high) goal_low, goal_high = np.array(goal_low), np.array(goal_high) self.two_frames = two_frames self.vel_in_state = vel_in_state if self.vel_in_state: obs_space_low = np.concatenate((self.ant_low, -np.ones(27))) obs_space_high = np.concatenate((self.ant_high, np.ones(27))) if goal_is_xy: goal_space_low = goal_low goal_space_high = goal_high else: goal_space_low = np.concatenate((goal_low, -np.ones(27))) goal_space_high = np.concatenate((goal_high, np.ones(27))) else: obs_space_low = np.concatenate((self.ant_low, -np.ones(13))) obs_space_high = np.concatenate((self.ant_high, np.ones(13))) if goal_is_xy: goal_space_low = goal_low goal_space_high = goal_high else: goal_space_low = np.concatenate((goal_low, -np.ones(13))) goal_space_high = np.concatenate((goal_high, np.ones(13))) if self.two_frames: self.obs_space = Box(np.concatenate( (obs_space_low, obs_space_low)), np.concatenate( (obs_space_high, obs_space_high)), dtype=np.float32) self.goal_space = Box(np.concatenate( (goal_space_low, goal_space_low)), np.concatenate( (goal_space_high, goal_space_high)), dtype=np.float32) else: self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) qpos_space = Box(-10 * np.ones(15), 10 * np.ones(15)) spaces = [ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ] if self.goal_is_xy: self.xy_obs_space = Box(goal_low, goal_high, dtype=np.float32) spaces += [ ('xy_observation', self.xy_obs_space), ('xy_desired_goal', self.xy_obs_space), ('xy_achieved_goal', self.xy_obs_space), ] if self.goal_is_qpos: spaces += [ ('qpos_desired_goal', qpos_space), ('qpos_achieved_goal', qpos_space), ] self.observation_space = Dict(spaces) self._full_state_goal = None self._xy_goal = None self._qpos_goal = None self._prev_obs = None self._cur_obs = None
def __init__(self, block_low=(-0.2, 0.55, 0.02), block_high=(0.2, 0.75, 0.02), hand_low=(0.0, 0.55, 0.3), hand_high=(0.0, 0.55, 0.3), stack_goal_low=(-0.2, 0.55, 0.02), stack_goal_high=(0.2, 0.75, 0.02), fix_goal=False, fixed_stack_goal=(0.0, 0.55, 0.02), fixed_hand_goal=(0.0, 0.75, 0.3), use_sparse_reward=False, sparse_reward_threshold=0.05, reset_free=False, hide_goal_markers=False, oracle_reset_prob=0.0, xml_path='sawyer_xyz/three_blocks.xml', **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=get_asset_full_path(xml_path), **kwargs) self.block_low = np.array(block_low) self.block_high = np.array(block_high) self.block_radius = 0.02 self.hand_low = np.array(hand_low) self.hand_high = np.array(hand_high) self.stack_goal_low = np.array(stack_goal_low) self.stack_goal_high = np.array(stack_goal_high) self.max_place_distance = max( np.linalg.norm((self.stack_goal_high - self.block_low), ord=2), np.linalg.norm((self.block_high - self.stack_goal_low), ord=2), ) self.fix_goal = fix_goal self.fixed_stack_goal = np.array(fixed_stack_goal) self.use_sparse_reward = use_sparse_reward self.sparse_reward_threshold = sparse_reward_threshold self.reset_free = reset_free self.hide_goal_markers = hide_goal_markers self.oracle_reset_prob = oracle_reset_prob self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32) diff = (self.block_high[0] - self.block_low[0]) / 3.0 midpoint = diff + self.block_low[0] block_one_high = [midpoint, *self.block_high[1:]] block_two_low = [midpoint, *self.block_low[1:]] midpoint = midpoint + diff block_two_high = [midpoint, *self.block_high[1:]] block_three_low = [midpoint, *self.block_low[1:]] self.sampling_space = Box(np.hstack( ([0.0], self.hand_low, self.block_low, block_two_low, block_three_low)), np.hstack( ([0.04], self.hand_high, block_one_high, block_two_high, self.block_high)), dtype=np.float32) self.gripper_and_hand_and_blocks_space = Box( np.hstack(([0.0], self.hand_low, self.block_low, self.block_low, self.block_low)), np.hstack(([0.04], self.hand_high, self.block_high, self.block_high, self.block_high)), dtype=np.float32) self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_blocks_space), ('desired_goal', self.gripper_and_hand_and_blocks_space), ('achieved_goal', self.gripper_and_hand_and_blocks_space), ('state_observation', self.gripper_and_hand_and_blocks_space), ('state_desired_goal', self.gripper_and_hand_and_blocks_space), ('state_achieved_goal', self.gripper_and_hand_and_blocks_space), ('proprio_observation', self.gripper_and_hand_and_blocks_space), ('proprio_desired_goal', self.gripper_and_hand_and_blocks_space), ('proprio_achieved_goal', self.gripper_and_hand_and_blocks_space), ]) self.block_one_id = self.model.get_joint_qpos_addr("blockOneJoint") self.block_two_id = self.model.get_joint_qpos_addr("blockTwoJoint") self.block_three_id = self.model.get_joint_qpos_addr("blockThreeJoint") self.reset()
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.02, fix_goal=False, fixed_goal=None, goal_low=None, goal_high=None, hard_goals=False, hide_goal_markers=True, presampled_goals=None, num_goals_presampled=1000, norm_order=2, structure='2d', two_obj=False, frame_skip=100, reset_p=None, goal_p=None, fixed_reset=None, hide_state_markers=True, test_mode_case_num=None, **kwargs ): self.quick_init(locals()) # self.obj_id[0] for first object, self.obj_id[1] for second object. self.obj_id = { 0: '', 1: '2', } assert structure in ['2d', '3d', '2d_wall_short', '2d_wall_tall', '2d_wall_tall_dark'] self.structure = structure self.two_obj = two_obj self.hard_goals = hard_goals MultitaskEnv.__init__(self) SawyerXYZEnv.__init__( self, model_name=self.model_name, frame_skip=frame_skip, **kwargs ) self.norm_order = norm_order if obj_low is None: obj_low = np.copy(self.hand_low) obj_low[2] = 0.0 if obj_high is None: obj_high = np.copy(self.hand_high) self.obj_low = np.array(obj_low) self.obj_high = np.array(obj_high) if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.subgoals = None self.fix_goal = fix_goal self._state_goal = None self.hide_goal_markers = hide_goal_markers self.hide_state_markers = hide_state_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32 ) if self.two_obj: self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low, obj_low)), np.hstack((self.hand_high, obj_high, obj_high)), dtype=np.float32 ) else: self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), dtype=np.float32 ) self.hand_space = Box( self.hand_low, self.hand_high, dtype=np.float32 ) if self.two_obj: self.gripper_and_hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low, obj_low, [0.0])), np.hstack((self.hand_high, obj_high, obj_high, [0.07])), dtype=np.float32 ) else: self.gripper_and_hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low, [0.0])), np.hstack((self.hand_high, obj_high, [0.07])), dtype=np.float32 ) self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_obj_space), ('desired_goal', self.gripper_and_hand_and_obj_space), ('achieved_goal', self.gripper_and_hand_and_obj_space), ('state_observation', self.gripper_and_hand_and_obj_space), ('state_desired_goal', self.gripper_and_hand_and_obj_space), ('state_achieved_goal', self.gripper_and_hand_and_obj_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) if presampled_goals is not None: self._presampled_goals = presampled_goals self.num_goals_presampled = len(list(self._presampled_goals.values)[0]) else: # if num_goals_presampled > 0, presampled_goals will be created when sample_goal is first called self._presampled_goals = None self.num_goals_presampled = num_goals_presampled self.picked_up_object = False self.train_pickups = 0 self.eval_pickups = 0 self.cur_mode = 'train' self.obj_radius = np.array([0.018, 0.016]) #0.020 self.ee_radius = np.array([0.053, 0.058]) #0.065 if self.structure == '2d_wall_short': self.wall_radius = np.array([0.03, 0.02]) self.wall_center = np.array([0.0, 0.60, 0.02]) elif self.structure in ['2d_wall_tall', '2d_wall_tall_dark']: self.wall_radius = np.array([0.015, 0.04]) self.wall_center = np.array([0.0, 0.60, 0.04]) else: self.wall_radius = None self.wall_center = None # self.obj_radius = 0.020 # self.ee_radius = 0.065 if fixed_reset is not None: fixed_reset = np.array(fixed_reset) self.fixed_reset = fixed_reset if fixed_goal is not None: fixed_goal = np.array(fixed_goal) self.fixed_goal = fixed_goal self.reset_p = reset_p self.goal_p = goal_p self.test_mode_case_num = test_mode_case_num if self.test_mode_case_num == 1: self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015]) self.fixed_goal = np.array([0.0, 0.50, 0.10, 0.0, 0.70, 0.03, 0.0, 0.70, 0.015]) elif self.test_mode_case_num == 2: self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015]) self.fixed_goal = np.array([0.0, 0.70, 0.10, 0.0, 0.50, 0.015, 0.0, 0.50, 0.03]) elif self.test_mode_case_num == 3: self.fixed_reset = np.array([0.0, 0.60, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015]) self.fixed_goal = np.array([0.0, 0.60, 0.05, 0.0, 0.70, 0.015, 0.0, 0.50, 0.015]) if presampled_goals is not None: self.reset() else: self.num_goals_presampled = 1 self.reset() self._presampled_goals = None self.num_goals_presampled = num_goals_presampled
def __init__(self, # Room room, # Start and Goal start_config="all", goal_config="all", # Reward potential_type='', shaped=False, base_reward='', # State and Goal Representations use_state_images=False, use_goal_images=False, image_resolution=64, # Time Limits max_path_length=None, *args, **kwargs ): # Initialize Serializable.quick_init(self, locals()) MultitaskEnv.__init__(self) # Environment Configuration self._room = room model = self.room.get_mjcmodel() self.possible_positions = self.room.XY(n=50) with model.asfile() as f: MujocoEnv.__init__(self, f.name, frame_skip=self.FRAME_SKIP) # Initialization self.start_config = start_config self.baseline_start = self.room.get_start() self.start = np.zeros_like(self.baseline_start) self.goal_config = goal_config self.baseline_goal = self.room.get_target() self.goal = np.zeros_like(self.baseline_goal) # Time Limit self.curr_path_length = 0 if max_path_length is None: self.max_path_length = self.MAX_PATH_LENGTH else: self.max_path_length = max_path_length # Reward Functions self.potential_type = potential_type self.shaped = shaped self.base_reward = base_reward # Action Space bounds = self.model.actuator_ctrlrange.copy() self.action_space = Box(low=bounds[:, 0], high=bounds[:, 1]) self.use_state_images = use_state_images self.use_goal_images = use_goal_images self.image_resolution = image_resolution # Observation Space example_state_obs = self._get_env_obs() if self.use_state_images: example_obs = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview') else: example_obs = example_state_obs state_obs_shape = example_obs.shape obs_shape = example_obs.shape self.obs_space = Box(-1 * np.ones(obs_shape),np.ones(obs_shape)) self.state_obs_space = Box(-1 * np.ones(state_obs_shape), np.ones(state_obs_shape)) # Goal Space example_state_goal = self._get_env_achieved_goal(example_state_obs) if self.use_goal_images: example_goal = self.get_image(self.image_resolution, self.image_resolution, camera_name='topview') else: example_goal = example_state_goal state_goal_shape = example_state_goal.shape goal_shape = example_goal.shape self.goal_space = Box(-1 * np.ones(goal_shape), np.ones(goal_shape)) self.state_goal_space = Box(-1 * np.ones(state_goal_shape), np.ones(state_goal_shape)) # Final Setup self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.state_obs_space), ('state_desired_goal', self.state_goal_space), ('state_achieved_goal', self.state_goal_space), ]) self.reset()
def __init__(self, reward_type='dense', norm_order=2, action_scale=0.15, frame_skip=100, ball_low=(-4.0, -4.0), ball_high=(4.0, 4.0), goal_low=(-4.0, -4.0), goal_high=(4.0, 4.0), model_path='pointmass_u_wall_big.xml', reset_low=None, reset_high=None, v_func_heatmap_bounds=(-2.0, 0.0), *args, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, model_path=get_asset_full_path('pointmass/' + model_path), frame_skip=frame_skip, **kwargs) self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self.action_scale = action_scale self.ball_radius = 0.50 self.walls = [ Wall(0, 1.0, 2.5, 0.5, self.ball_radius), Wall(2.0, -0.5, 0.5, 2.0, self.ball_radius), Wall(-2.0, -0.5, 0.5, 2.0, self.ball_radius), ] self.reward_type = reward_type self.norm_order = norm_order self.ball_low, self.ball_high = np.array(ball_low), np.array(ball_high) self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high) if reset_low is None: self.reset_low = np.array(ball_low) else: self.reset_low = np.array(reset_low) if reset_high is None: self.reset_high = np.array(ball_high) else: self.reset_high = np.array(reset_high) obs_space_low = np.copy(self.ball_low) obs_space_high = np.copy(self.ball_high) goal_space_low = np.copy(self.goal_low) goal_space_high = np.copy(self.goal_high) self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.obs_space), ('proprio_desired_goal', self.goal_space), ('proprio_achieved_goal', self.obs_space), ]) self.v_func_heatmap_bounds = v_func_heatmap_bounds self._state_goal = None self.reset()
def __init__( self, reward_type='state_distance', norm_order=2, action_scale=20, frame_skip=3, two_frames=False, vel_in_state=True, z_in_state=True, car_low=(-1.90, -1.90), #list([-1.60, -1.60]), car_high=(1.90, 1.90), #list([1.60, 1.60]), goal_low=(-1.90, -1.90), #list([-1.60, -1.60]), goal_high=(1.90, 1.90), #list([1.60, 1.60]), model_path='wheeled_car.xml', reset_low=None, reset_high=None, *args, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) MujocoEnv.__init__(self, model_path=get_asset_full_path('locomotion/' + model_path), frame_skip=frame_skip, **kwargs) self.action_space = Box(np.array([-1, -1]), np.array([1, 1]), dtype=np.float32) self.action_scale = action_scale # set radius if model_path in ['wheeled_car.xml', 'wheeled_car_u_wall.xml']: self.car_radius = np.sqrt(2) * 0.34 elif model_path == 'wheeled_car_old.xml': self.car_radius = 0.30 else: raise NotImplementedError # set walls if model_path == 'wheeled_car_u_wall.xml': self.walls = [ Wall(0, -0.5, 0.75, 0.05, self.car_radius), Wall(0.70, 0.05, 0.05, 0.5, self.car_radius), Wall(-0.70, 0.05, 0.05, 0.5, self.car_radius), ] else: self.walls = [] self.reward_type = reward_type self.norm_order = norm_order self.car_low, self.car_high = np.array(car_low), np.array(car_high) self.goal_low, self.goal_high = np.array(goal_low), np.array(goal_high) if reset_low is None: self.reset_low = np.array(car_low) else: self.reset_low = np.array(reset_low) if reset_high is None: self.reset_high = np.array(car_high) else: self.reset_high = np.array(reset_high) self.car_low += self.car_radius self.car_high -= self.car_radius self.goal_low += self.car_radius self.goal_high -= self.car_radius self.reset_low += self.car_radius self.reset_high -= self.car_radius self.two_frames = two_frames self.vel_in_state = vel_in_state self.z_in_state = z_in_state # x and y pos obs_space_low = np.copy(self.car_low) obs_space_high = np.copy(self.car_high) goal_space_low = np.copy(self.goal_low) goal_space_high = np.copy(self.goal_high) # z pos if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-1])) obs_space_high = np.concatenate((obs_space_high, [0.03])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # sin and cos obs_space_low = np.concatenate((obs_space_low, [-1, -1])) obs_space_high = np.concatenate((obs_space_high, [1, 1])) goal_space_low = np.concatenate((goal_space_low, [-1, -1])) goal_space_high = np.concatenate((goal_space_high, [1, 1])) if self.vel_in_state: # x and y vel obs_space_low = np.concatenate((obs_space_low, [-10, -10])) obs_space_high = np.concatenate((obs_space_high, [10, 10])) goal_space_low = np.concatenate((goal_space_low, [0, 0])) goal_space_high = np.concatenate((goal_space_high, [0, 0])) # z vel if self.z_in_state: obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) # theta vel obs_space_low = np.concatenate((obs_space_low, [-10])) obs_space_high = np.concatenate((obs_space_high, [10])) goal_space_low = np.concatenate((goal_space_low, [0])) goal_space_high = np.concatenate((goal_space_high, [0])) self.obs_space = Box(obs_space_low, obs_space_high, dtype=np.float32) self.goal_space = Box(goal_space_low, goal_space_high, dtype=np.float32) if self.two_frames: self.obs_space = concatenate_box_spaces(self.obs_space, self.obs_space) self.goal_space = concatenate_box_spaces(self.goal_space, self.goal_space) self.observation_space = Dict([ ('observation', self.obs_space), ('desired_goal', self.goal_space), ('achieved_goal', self.obs_space), ('state_observation', self.obs_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.obs_space), ('proprio_observation', self.obs_space), ('proprio_desired_goal', self.goal_space), ('proprio_achieved_goal', self.obs_space), ]) self._state_goal = None self._cur_obs = None self._prev_obs = None self.reset()
def __init__(self, puck_low=(-.4, .2), puck_high=(.4, 1), reward_type='state_distance', norm_order=1, indicator_threshold=0.06, hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=False, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -.2, .4, -.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8, .2, .8), hide_goal_markers=False, init_puck_z=0.02, num_resets_before_puck_reset=1, num_resets_before_hand_reset=1, always_start_on_same_side=True, goal_always_on_same_side=True, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_two_puck_space = Box(np.hstack( (self.hand_low, puck_low, puck_low)), np.hstack((self.hand_high, puck_high, puck_high)), dtype=np.float32) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_two_puck_space), ('desired_goal', self.hand_and_two_puck_space), ('achieved_goal', self.hand_and_two_puck_space), ('state_observation', self.hand_and_two_puck_space), ('state_desired_goal', self.hand_and_two_puck_space), ('state_achieved_goal', self.hand_and_two_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self.reset_counter = 0 self.num_resets_before_puck_reset = num_resets_before_puck_reset self.num_resets_before_hand_reset = num_resets_before_hand_reset self._always_start_on_same_side = always_start_on_same_side self._goal_always_on_same_side = goal_always_on_same_side self._set_puck_xys(self._sample_puck_xys())
def __init__(self, reward_type='puck_distance', norm_order=2, indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.0, 0.6, 0.02, -0.15, 0.6), puck_low=None, puck_high=None, hand_low=(-0.25, 0.3, 0.02), hand_high=(0.25, .8, .02), goal_low=(-0.25, 0.3), goal_high=(0.25, 0.8), hide_goal_markers=False, init_puck_z=0.02, reset_free=False, init_puck_pos=(0.0, .65), mode='train', **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.array(puck_low) if goal_high is None: goal_high = np.array(puck_high) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), ) self.puck_space = Box(puck_low, puck_high) self.hand_space = Box(self.hand_low, self.hand_high) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.puck_space), ('achieved_goal', self.puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.puck_space), ('state_achieved_goal', self.puck_space), ]) self.init_puck_z = init_puck_z self.reset_free = reset_free self.puck_pos = self.get_puck_pos()[:2] self.mode(mode) self.init_puck_pos = np.array(init_puck_pos)
def __init__( self, obj_low=(-0.1, 0.35, 0.05), obj_high=(0.1, 0.55, 0.05), reward_type='dense', indicator_threshold=0.06, random_init=True, # hand_low=(-0.5, 0.40, 0.05), # hand_high=(0.5, 1, 0.5), hand_low=(-0.7, 0.30, 0.05), hand_high=(0.7, 1, 0.5), goal_low=None, goal_high=None, reset_free=False, num_beads=7, init_pos=(0., 0.4, 0.02), substeps=50, log_video=False, video_substeps=5, video_h=500, video_w=500, camera_name='leftcam', sparse=False, action_penalty_const=1e-2, rotMode='fixed', #'fixed', **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) model = rope(num_beads=num_beads, init_pos=init_pos, texture=True) import os model.save(os.path.expanduser('~/sawyer_rope.xml')) with model.asfile() as f: SawyerXYZEnv.__init__(self, model_name=f.name, frame_skip=5, **kwargs) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high self.obj_low = obj_low self.obj_high = obj_high self.reset_free = reset_free #sim params self.num_beads = num_beads self.init_pos = init_pos self.substeps = substeps # number of intermediate positions to generate #reward params self.sparse = sparse self.action_penalty_const = action_penalty_const #video params self.log_video = log_video self.video_substeps = video_substeps self.video_h = video_h self.video_w = video_w self.camera_name = camera_name self._state_goal = np.asarray([1.0, 0.0]) self.rotMode = rotMode self.random_init = random_init if rotMode == 'fixed': self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) elif rotMode == 'rotz': self.action_rot_scale = 1. / 10 self.action_space = Box( np.array([-1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1]), ) elif rotMode == 'quat': self.action_space = Box( np.array([-1, -1, -1, 0, -1, -1, -1, -1]), np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]), ) else: self.action_space = Box( np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]), np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]), ) self.obj_and_goal_space = Box(np.array(obj_low), np.array(obj_high), dtype=np.float32) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Box(np.hstack( (self.hand_low, list(obj_low) * self.num_beads)), np.hstack( (self.hand_high, list(obj_high) * self.num_beads)), dtype=np.float32) self.hand_reset_pos = np.array([0, .6, .2]) self.reset()
def __init__( self, obj_low=None, obj_high=None, hand_low=(-0.1, 0.55, 0.05), hand_high=(0.05, 0.65, 0.2), reward_type='hand_and_obj_distance', indicator_threshold=0.06, frame_skip = 50, obj_init_positions=((0, 0.6, 0.02),), random_init=False, num_objects=1, fix_goal=False, mocap_low=(-0.13, 0.57, 0.04), mocap_high=(0.08, 0.73, 0.2), action_scale=0.02, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, reset_free=False, filename='sawyer_pick_and_place_multiobj.xml', object_mass=1, # object_meshes=['Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl'], object_meshes=None, obj_classname = None, block_height=0.04, block_width = 0.04, cylinder_radius = 0.015, finger_sensors=False, maxlen=0.06, minlen=0.01, preload_obj_dict=None, hide_goal_markers=False, oracle_reset_prob=0.0, presampled_goals=None, num_goals_presampled=2, p_obj_in_hand=1, **kwargs ): self.quick_init(locals()) MultitaskEnv.__init__(self) base_filename = asset_base_path + filename friction_params = (1, 1, 2) self.obj_stat_prop = create_object_xml(base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius) gen_xml = create_root_xml(base_filename) MujocoEnv.__init__(self, gen_xml, frame_skip=frame_skip) self.reset_mocap_welds() clean_xml(gen_xml) self.mocap_low = mocap_low self.mocap_high = mocap_high self.action_scale = action_scale self.hand_low = np.array(hand_low) self.hand_high = np.array(hand_high) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high self.obj_low = obj_low self.obj_high = obj_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.reward_type = reward_type self.random_init = random_init self.p_obj_in_hand = p_obj_in_hand self.indicator_threshold = indicator_threshold self.obj_init_z = obj_init_positions[0][2] self.obj_init_positions = np.array(obj_init_positions) self.last_obj_pos = self.obj_init_positions[0] self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.reset_free = reset_free self.oracle_reset_prob = oracle_reset_prob self.obj_ind_to_manip = 0 self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32 ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), dtype=np.float32 ) self.hand_space = Box( self.hand_low, self.hand_high, dtype=np.float32 ) self.gripper_and_hand_and_obj_space = Box( np.hstack(([0.0], self.hand_low, obj_low)), np.hstack(([0.04], self.hand_high, obj_high)), dtype=np.float32 ) self.num_objects = num_objects self.maxlen = maxlen self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.gripper_and_hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.hand_reset_pos = np.array([0, .6, .2]) if presampled_goals is not None: self._presampled_goals = presampled_goals self.num_goals_presampled = len(list(self._presampled_goals.values)[0]) else: # presampled_goals will be created when sample_goal is first called self._presampled_goals = None self.num_goals_presampled = num_goals_presampled self.num_goals_presampled = 10 self.picked_up_object = False self.train_pickups = 0 self.eval_pickups = 0 self.cur_mode = 'train' self.reset()
def sample_goal(self): return MultitaskEnv.sample_goal(self)