def __init__(self, reward_type='hand_distance', norm_order=1, indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.15, 0.6, 0.3), hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_space = Box(self.hand_low, self.hand_high) self.observation_space = Dict([ ('observation', self.hand_space), ('desired_goal', self.hand_space), ('achieved_goal', self.hand_space), ('state_observation', self.hand_space), ('state_desired_goal', self.hand_space), ('state_achieved_goal', self.hand_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ])
def __init__( self, goal_low=(-.25, .3, .12, -1.5708), goal_high=(.25, .6, .12, 0), action_reward_scale=0, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(0, .45, .12, -.25), reset_free=False, fixed_hand_z=0.12, hand_low=(-0.25, 0.3, .12), hand_high=(0.25, 0.6, .12), target_pos_scale=1, target_angle_scale=1, min_angle=-1.5708, max_angle=0, xml_path='sawyer_xyz/sawyer_door_pull.xml', **sawyer_xyz_kwargs ): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) SawyerXYZEnv.__init__( self, self.model_name, hand_low=hand_low, hand_high=hand_high, **sawyer_xyz_kwargs ) MultitaskEnv.__init__(self) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box(np.array(goal_low), np.array(goal_high)) self._state_goal = None self.fixed_hand_z = fixed_hand_z self.action_space = Box(np.array([-1, -1]), np.array([1, 1])) self.state_space = Box( np.concatenate((hand_low, [min_angle])), np.concatenate((hand_high, [max_angle])), ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.state_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.state_space), ]) self.action_reward_scale = action_reward_scale self.target_pos_scale = target_pos_scale self.target_angle_scale = target_angle_scale self.reset_free = reset_free self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint') self.reset()
def __init__(self, puck_low=None, puck_high=None, reward_type='hand_and_puck_distance', indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) goal_low = np.array(goal_low) goal_high = np.array(goal_high) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), ) self.hand_space = Box(self.hand_low, self.hand_high) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = self.get_puck_pos()[2]
def __init__(self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_pos=(0, 0.65, 0.03), fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, hide_goal_markers=False, hide_arm=False, num_objects=1, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) self.hide_arm = hide_arm SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) self.num_objects = num_objects if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_low is None: goal_low = np.hstack((self.hand_low, np.tile(obj_low, num_objects))) if goal_high is None: goal_high = np.hstack( (self.hand_high, np.tile(obj_high, num_objects))) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.obj_init_pos = np.array(obj_init_pos) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, np.tile(obj_low, num_objects))), np.hstack((self.hand_high, np.tile(obj_high, num_objects))), ) self.observation_space = Dict([ ('observation', self.hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ])
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_pos=(0, 0.6, 0.02), fix_goal=True, fixed_goal=(0, 0.85, 0.12, 0.1), #3D placing goal, followed by height target for picking goal_low=None, goal_high=None, hide_goal_markers=False, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.max_path_length = 200 self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.obj_init_pos = np.array(obj_init_pos) self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.observation_space = Dict([ ('observation', self.hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ]) self.reset()
def __init__(self, doorGrasp_low=None, doorGrasp_high=None, tasks=[{ 'goalAngle': [1.0472], 'door_init_pos': [0, 1, 0.3] }], goal_low=np.array([0]), goal_high=np.array([1.58825]), hand_init_pos=(0, 0.4, 0.1), fixed_door_pos=(0, 1.0, 0.3), image=False, doorHalfWidth=0.2, mpl=100, **kwargs): self.quick_init(locals()) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if doorGrasp_low is None: doorGrasp_low = self.hand_low if doorGrasp_high is None: doorGrasp_high = self.hand_high self.max_path_length = mpl self.doorHalfWidth = doorHalfWidth self.fixed_door_pos = np.array([0, 1.0, 0.3]) self.hand_init_pos = np.array([0, 0.4, 0.1]) self.info_logKeys = ['angleDelta', 'doorAngle', 'doorAngleTarget'] import pickle self.tasks = np.array(tasks) self.num_tasks = len(tasks) self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_door_space = Box(np.hstack( (self.hand_low, doorGrasp_low)), np.hstack( (self.hand_high, doorGrasp_high)), dtype=np.float32) self.goal_space = Box(goal_low, goal_high) self.observation_space = Dict([ ('state_observation', self.hand_and_door_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space) ])
def __init__(self, obj_low=None, obj_high=None, obj_init_pos=(0, 0.6, 0.02), goals=[[0, 0.7, 0.02]], goal_low=None, goal_high=None, hand_init_pos=(0, 0.4, 0.25), blockSize=0.02, **kwargs): self.quick_init(locals()) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high self.max_path_length = 150 self.goals = goals self.num_goals = len(goals) self.obj_init_pos = np.array(obj_init_pos) self.hand_init_pos = np.array(hand_init_pos) self.blockSize = blockSize self.action_space = Box( np.array([-1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.goal_space = Box(goal_low, goal_high) self.observation_space = Dict([('state_observation', self.hand_and_obj_space), ('desired_goal', self.goal_space)]) self.reset()
def __init__( self, puck_low=(-.35, .25), puck_high=(.35, .95), norm_order=1, indicator_threshold=0.06, touch_threshold=0.1, # I just chose this number after doing a few runs and looking at a histogram hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=True, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -0.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8), init_puck_z=0.035, init_hand_xyz=(0, 0.4, 0.07), reset_free=False, xml_path='sawyer_xyz/sawyer_push_puck.xml', clamp_puck_on_step=False, puck_radius=.07, **kwargs ): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) self.goal_type = kwargs.pop('goal_type', 'puck') self.dense_reward = kwargs.pop('dense_reward', False) self.indicator_threshold = kwargs.pop('goal_tolerance', indicator_threshold) self.fixed_goal = np.array(kwargs.pop('goal', fixed_goal)) self.task_agnostic = kwargs.pop('task_agnostic', False) self.init_puck_xy = np.array(kwargs.pop('init_puck_xy', [0, 0.6])) self.init_hand_xyz = np.array(kwargs.pop('init_hand_xyz', init_hand_xyz)) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__( self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs ) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] self.puck_low = np.array(puck_low) self.puck_high = np.array(puck_high) if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.norm_order = norm_order self.touch_threshold = touch_threshold self.fix_goal = fix_goal self._state_goal = None self.hide_goal_markers = self.task_agnostic self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), dtype=np.float32 ) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self._set_puck_xy(self.sample_puck_xy()) self.reset_free = reset_free self.reset_counter = 0 self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.clamp_puck_on_step = clamp_puck_on_step self.puck_radius = puck_radius self.reset()
def __init__(self, block_low=(-0.2, 0.55, 0.02), block_high=(0.2, 0.75, 0.02), hand_low=(0.0, 0.55, 0.3), hand_high=(0.0, 0.55, 0.3), stack_goal_low=(-0.2, 0.55, 0.02), stack_goal_high=(0.2, 0.75, 0.02), fix_goal=False, fixed_stack_goal=(0.0, 0.55, 0.02), fixed_hand_goal=(0.0, 0.75, 0.3), use_sparse_reward=False, sparse_reward_threshold=0.05, reset_free=False, hide_goal_markers=False, oracle_reset_prob=0.0, xml_path='sawyer_xyz/three_blocks.xml', **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, model_name=get_asset_full_path(xml_path), **kwargs) self.block_low = np.array(block_low) self.block_high = np.array(block_high) self.block_radius = 0.02 self.hand_low = np.array(hand_low) self.hand_high = np.array(hand_high) self.stack_goal_low = np.array(stack_goal_low) self.stack_goal_high = np.array(stack_goal_high) self.max_place_distance = max( np.linalg.norm((self.stack_goal_high - self.block_low), ord=2), np.linalg.norm((self.block_high - self.stack_goal_low), ord=2), ) self.fix_goal = fix_goal self.fixed_stack_goal = np.array(fixed_stack_goal) self.use_sparse_reward = use_sparse_reward self.sparse_reward_threshold = sparse_reward_threshold self.reset_free = reset_free self.hide_goal_markers = hide_goal_markers self.oracle_reset_prob = oracle_reset_prob self.action_space = Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32) diff = (self.block_high[0] - self.block_low[0]) / 3.0 midpoint = diff + self.block_low[0] block_one_high = [midpoint, *self.block_high[1:]] block_two_low = [midpoint, *self.block_low[1:]] midpoint = midpoint + diff block_two_high = [midpoint, *self.block_high[1:]] block_three_low = [midpoint, *self.block_low[1:]] self.sampling_space = Box(np.hstack( ([0.0], self.hand_low, self.block_low, block_two_low, block_three_low)), np.hstack( ([0.04], self.hand_high, block_one_high, block_two_high, self.block_high)), dtype=np.float32) self.gripper_and_hand_and_blocks_space = Box( np.hstack(([0.0], self.hand_low, self.block_low, self.block_low, self.block_low)), np.hstack(([0.04], self.hand_high, self.block_high, self.block_high, self.block_high)), dtype=np.float32) self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_blocks_space), ('desired_goal', self.gripper_and_hand_and_blocks_space), ('achieved_goal', self.gripper_and_hand_and_blocks_space), ('state_observation', self.gripper_and_hand_and_blocks_space), ('state_desired_goal', self.gripper_and_hand_and_blocks_space), ('state_achieved_goal', self.gripper_and_hand_and_blocks_space), ('proprio_observation', self.gripper_and_hand_and_blocks_space), ('proprio_desired_goal', self.gripper_and_hand_and_blocks_space), ('proprio_achieved_goal', self.gripper_and_hand_and_blocks_space), ]) self.block_one_id = self.model.get_joint_qpos_addr("blockOneJoint") self.block_two_id = self.model.get_joint_qpos_addr("blockTwoJoint") self.block_three_id = self.model.get_joint_qpos_addr("blockThreeJoint") self.reset()
def __init__( self, obj_low=None, obj_high=None, #tasks = [{'task': 'drawer' , 'obj_pos': np.array([0, 0.52 , 0.02]) , 'goal_pos': np.array([0, 0.81]) }] , tasks=[{ 'task': 'drawer', 'obj_init_pos': obj_init_pos, 'padded_target_pos': np.array([0.1, 0, 0]), 'door_pos': door_pos }], #tasks = None, pushRew_weight=1, goal_low=np.array([-0.5, 0.4, 0.05, None]), goal_high=np.array([0.5, 1., 0.5, None]), hand_init_pos=(0, 0.4, 0.1), doorHalfWidth=0.2, rewMode='posPlace', indicatorDist=0.05, image=False, image_dim=84, camera_name='angled_cam', mpl=150, hide_goal=True, change_task_every_episode=False, **kwargs): self.quick_init(locals()) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) self.sparse = False self.pushRew_weight = pushRew_weight if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high if goal_low is None: goal_low = self.hand_low #if doorGrasp_low is None: door_grasp_low = self.hand_low #if doorGrasp_high is None: door_grasp_high = self.hand_high self.push_task_id = 0 self.door_task_id = 1 self.drawer_task_id = 2 self.doorHalfWidth = doorHalfWidth self.camera_name = camera_name self.objHeight = self.model.body_pos[-1][2] #assert self.objHeight != 0 self.max_path_length = mpl self.image = image self.image_dim = image_dim self.tasks = np.array(tasks) self.change_task_every_episode = change_task_every_episode self.num_tasks = len(tasks) self.rewMode = rewMode self.Ind = indicatorDist self.hand_init_pos = np.array(hand_init_pos) self.action_space = Box( np.array([-1, -1, -1]), np.array([1, 1, 1]), ) drawer_grasp_low = door_grasp_low drawer_grasp_high = door_grasp_high self.hand_env_space = Box( np.hstack( (self.hand_low, obj_low, door_grasp_low, drawer_grasp_low)), np.hstack((self.hand_high, obj_high, door_grasp_high, drawer_grasp_high)), ) self.goal_space = Box(goal_low, goal_high) #self.initialize_camera(sawyer_pusher_cam) self.push_info_logKeys = ['placeDist'] self.door_info_logKeys = ['angleDelta'] self.drawer_info_logKeys = ['posDelta'] self.hide_goal = hide_goal if self.image: self.set_image_obsSpace() else: self.set_state_obsSpace()
def __init__( self, hand_low=(-0.5, 0.40, 0.05), hand_high=(0.5, 1, 0.5), obj_low=None, obj_high=None, random_init=False, tasks = [{'goal': np.array([0.1, 0.8, 0.2]), 'obj_init_pos':np.array([0, 0.6, 0.02]), 'obj_init_angle': 0.3}], goal_low=None, goal_high=None, hand_init_pos = (0, 0.6, 0.2), liftThresh = 0.04, rewMode = 'orig', rotMode='rotz',#'fixed', **kwargs ): self.quick_init(locals()) SawyerXYZEnv.__init__( self, frame_skip=5, action_scale=1./100, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs ) if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high self.random_init = random_init self.liftThresh = liftThresh self.max_path_length = 200#150 self.tasks = tasks self.num_tasks = len(tasks) self.rewMode = rewMode self.rotMode = rotMode self.hand_init_pos = np.array(hand_init_pos) if rotMode == 'fixed': self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) elif rotMode == 'rotz': self.action_rot_scale = 1./50 self.action_space = Box( np.array([-1, -1, -1, -np.pi, -1]), np.array([1, 1, 1, np.pi, 1]), ) elif rotMode == 'quat': self.action_space = Box( np.array([-1, -1, -1, 0, -1, -1, -1, -1]), np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]), ) else: self.action_space = Box( np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]), np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.goal_space = Box(goal_low, goal_high) self.observation_space = Box( np.hstack((self.hand_low, obj_low, obj_low)), np.hstack((self.hand_high, obj_high, obj_high)), )
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.02, fix_goal=False, fixed_goal=None, goal_low=None, goal_high=None, hard_goals=False, hide_goal_markers=True, presampled_goals=None, num_goals_presampled=1000, norm_order=2, structure='2d', two_obj=False, frame_skip=100, reset_p=None, goal_p=None, fixed_reset=None, hide_state_markers=True, test_mode_case_num=None, **kwargs ): self.quick_init(locals()) # self.obj_id[0] for first object, self.obj_id[1] for second object. self.obj_id = { 0: '', 1: '2', } assert structure in ['2d', '3d', '2d_wall_short', '2d_wall_tall', '2d_wall_tall_dark'] self.structure = structure self.two_obj = two_obj self.hard_goals = hard_goals MultitaskEnv.__init__(self) SawyerXYZEnv.__init__( self, model_name=self.model_name, frame_skip=frame_skip, **kwargs ) self.norm_order = norm_order if obj_low is None: obj_low = np.copy(self.hand_low) obj_low[2] = 0.0 if obj_high is None: obj_high = np.copy(self.hand_high) self.obj_low = np.array(obj_low) self.obj_high = np.array(obj_high) if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.subgoals = None self.fix_goal = fix_goal self._state_goal = None self.hide_goal_markers = hide_goal_markers self.hide_state_markers = hide_state_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32 ) if self.two_obj: self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low, obj_low)), np.hstack((self.hand_high, obj_high, obj_high)), dtype=np.float32 ) else: self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), dtype=np.float32 ) self.hand_space = Box( self.hand_low, self.hand_high, dtype=np.float32 ) if self.two_obj: self.gripper_and_hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low, obj_low, [0.0])), np.hstack((self.hand_high, obj_high, obj_high, [0.07])), dtype=np.float32 ) else: self.gripper_and_hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low, [0.0])), np.hstack((self.hand_high, obj_high, [0.07])), dtype=np.float32 ) self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_obj_space), ('desired_goal', self.gripper_and_hand_and_obj_space), ('achieved_goal', self.gripper_and_hand_and_obj_space), ('state_observation', self.gripper_and_hand_and_obj_space), ('state_desired_goal', self.gripper_and_hand_and_obj_space), ('state_achieved_goal', self.gripper_and_hand_and_obj_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) if presampled_goals is not None: self._presampled_goals = presampled_goals self.num_goals_presampled = len(list(self._presampled_goals.values)[0]) else: # if num_goals_presampled > 0, presampled_goals will be created when sample_goal is first called self._presampled_goals = None self.num_goals_presampled = num_goals_presampled self.picked_up_object = False self.train_pickups = 0 self.eval_pickups = 0 self.cur_mode = 'train' self.obj_radius = np.array([0.018, 0.016]) #0.020 self.ee_radius = np.array([0.053, 0.058]) #0.065 if self.structure == '2d_wall_short': self.wall_radius = np.array([0.03, 0.02]) self.wall_center = np.array([0.0, 0.60, 0.02]) elif self.structure in ['2d_wall_tall', '2d_wall_tall_dark']: self.wall_radius = np.array([0.015, 0.04]) self.wall_center = np.array([0.0, 0.60, 0.04]) else: self.wall_radius = None self.wall_center = None # self.obj_radius = 0.020 # self.ee_radius = 0.065 if fixed_reset is not None: fixed_reset = np.array(fixed_reset) self.fixed_reset = fixed_reset if fixed_goal is not None: fixed_goal = np.array(fixed_goal) self.fixed_goal = fixed_goal self.reset_p = reset_p self.goal_p = goal_p self.test_mode_case_num = test_mode_case_num if self.test_mode_case_num == 1: self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015]) self.fixed_goal = np.array([0.0, 0.50, 0.10, 0.0, 0.70, 0.03, 0.0, 0.70, 0.015]) elif self.test_mode_case_num == 2: self.fixed_reset = np.array([0.0, 0.50, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015]) self.fixed_goal = np.array([0.0, 0.70, 0.10, 0.0, 0.50, 0.015, 0.0, 0.50, 0.03]) elif self.test_mode_case_num == 3: self.fixed_reset = np.array([0.0, 0.60, 0.05, 0.0, 0.50, 0.015, 0.0, 0.70, 0.015]) self.fixed_goal = np.array([0.0, 0.60, 0.05, 0.0, 0.70, 0.015, 0.0, 0.50, 0.015]) if presampled_goals is not None: self.reset() else: self.num_goals_presampled = 1 self.reset() self._presampled_goals = None self.num_goals_presampled = num_goals_presampled
def __init__( self, hand_low=(-0.5, 0.40, 0.07), hand_high=(0.5, 1, 0.5), obj_low=None, obj_high=None, random_init=False, obs_type='plain', tasks = [{'goal': np.array([0.12, 0.7, 0.02]), 'obj_init_pos':np.array([-0.12, 0.7, 0.02]), 'obj_init_angle': 0.3}], goal_low=None, goal_high=None, hand_init_pos = (0, 0.6, 0.2), liftThresh = 0.1, rewMode = 'orig', rotMode='fixed', multitask=False, multitask_num=1, if_render=False, **kwargs ): self.quick_init(locals()) SawyerXYZEnv.__init__( self, frame_skip=5, action_scale=1./100, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs ) assert obs_type in OBS_TYPE if multitask: obs_type = 'with_goal_and_id' self.obs_type = obs_type if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high self.random_init = random_init self.liftThresh = liftThresh self.max_path_length = 150 self.tasks = tasks self.num_tasks = len(tasks) self.rewMode = rewMode self.rotMode = rotMode self.hand_init_pos = np.array(hand_init_pos) self.multitask = multitask self.multitask_num = multitask_num self._state_goal_idx = np.zeros(self.multitask_num) self.if_render = if_render if rotMode == 'fixed': self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) elif rotMode == 'quat': self.action_space = Box( np.array([-1, -1, -1, 0, -1, -1, -1, -1]), np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]), ) else: self.action_space = Box( np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]), np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.goal_and_obj_space = Box( np.hstack((goal_low[:2], obj_low[:2])), np.hstack((goal_high[:2], obj_high[:2])), ) self.goal_space = Box(goal_low[:2], goal_high[:2]) if not multitask and self.obs_type == 'with_goal_id': self.observation_space = Box( np.hstack((self.hand_low, obj_low, np.zeros(len(tasks)))), np.hstack((self.hand_high, obj_high, np.ones(len(tasks)))), ) elif not multitask and self.obs_type == 'plain': self.observation_space = Box( np.hstack((self.hand_low, obj_low,)), np.hstack((self.hand_high, obj_high,)), ) elif not multitask and self.obs_type == 'with_goal': self.observation_space = Box( np.hstack((self.hand_low, obj_low, goal_low)), np.hstack((self.hand_high, obj_high, goal_high)), ) else: self.observation_space = Box( np.hstack((self.hand_low, obj_low, goal_low, np.zeros(multitask_num))), np.hstack((self.hand_high, obj_high, goal_high, np.ones(multitask_num))), ) # task = self.sample_task() # self._state_goal = np.array(task['goal']) # self.obj_init_pos = self.adjust_initObjPos(task['obj_init_pos']) # self.obj_init_angle = task['obj_init_angle'] # self.objHeight = self.data.get_geom_xpos('objGeom')[2] # self.heightTarget = self.objHeight + self.liftThresh # self.curr_path_length = 0 # self.maxPlacingDist = np.linalg.norm(np.array([self.obj_init_pos[0], self.obj_init_pos[1]]) - np.array(self._state_goal)) + self.heightTarget self.reset()
def __init__(self, puck_low=(-.4, .2), puck_high=(.4, 1), reward_type='state_distance', norm_order=1, indicator_threshold=0.06, hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=False, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8), hide_goal_markers=False, init_puck_z=0.02, init_hand_xyz=(0, 0.4, 0.07), reset_free=False, xml_path='sawyer_xyz/sawyer_push_puck.xml', clamp_puck_on_step=False, puck_radius=.07, **kwargs): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_puck_space = Box(np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), dtype=np.float32) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.hand_and_puck_space), ('achieved_goal', self.hand_and_puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.hand_and_puck_space), ('state_achieved_goal', self.hand_and_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self.init_hand_xyz = np.array(init_hand_xyz) self._set_puck_xy(self.sample_puck_xy()) self.reset_free = reset_free self.reset_counter = 0 self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.clamp_puck_on_step = clamp_puck_on_step self.puck_radius = puck_radius self.reset()
def __init__( self, obj_low=None, obj_high=None, tasks = [{'goal': [0, 0.7, 0.02], 'obj_init_pos':[0, 0.6, 0.02]}] , #tasks = None, goal_low=None, goal_high=None, hand_init_pos = (0, 0.4, 0.05), rewMode = 'posPlace', indicatorDist = 0.05, image = False, image_dim = 84, camera_name = 'robotview_zoomed', mpl = 150, hide_goal = True, hand_type = 'parallel_v1', n_tasks=15, **kwargs ): self.quick_init(locals()) self.hand_type = hand_type SawyerXYZEnv.__init__( self, hand_type = self.hand_type, model_name=self.model_name, **kwargs ) if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high self.camera_name = camera_name #self.objHeight = self.model.body_pos[-1][2] #assert self.objHeight != 0 self.max_path_length = mpl self.image = image self.image_dim = image_dim self.task_temp = np.array(tasks) self.tasks = self.sample_tasks(n_tasks) self.num_tasks = len(tasks) self.rewMode = rewMode self.Ind = indicatorDist self.hand_init_pos = np.array(hand_init_pos) self.action_space = Box( np.array([-1, -1, -1]), np.array([1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.goal_space = Box(goal_low, goal_high) #self.initialize_camera(sawyer_pusher_cam) self.info_logKeys = ['placeDist'] self.hide_goal = hide_goal if self.image: self.set_image_obsSpace() else: self.set_state_obsSpace()
def __init__( self, obj_low=(-0.1, 0.35, 0.05), obj_high=(0.1, 0.55, 0.05), reward_type='dense', indicator_threshold=0.06, random_init=True, # hand_low=(-0.5, 0.40, 0.05), # hand_high=(0.5, 1, 0.5), hand_low=(-0.7, 0.30, 0.05), hand_high=(0.7, 1, 0.5), goal_low=None, goal_high=None, reset_free=False, num_beads=7, init_pos=(0., 0.4, 0.02), substeps=50, log_video=False, video_substeps=5, video_h=500, video_w=500, camera_name='leftcam', sparse=False, action_penalty_const=1e-2, rotMode='fixed', #'fixed', **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) model = rope(num_beads=num_beads, init_pos=init_pos, texture=True) import os model.save(os.path.expanduser('~/sawyer_rope.xml')) with model.asfile() as f: SawyerXYZEnv.__init__(self, model_name=f.name, frame_skip=5, **kwargs) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high self.obj_low = obj_low self.obj_high = obj_high self.reset_free = reset_free #sim params self.num_beads = num_beads self.init_pos = init_pos self.substeps = substeps # number of intermediate positions to generate #reward params self.sparse = sparse self.action_penalty_const = action_penalty_const #video params self.log_video = log_video self.video_substeps = video_substeps self.video_h = video_h self.video_w = video_w self.camera_name = camera_name self._state_goal = np.asarray([1.0, 0.0]) self.rotMode = rotMode self.random_init = random_init if rotMode == 'fixed': self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) elif rotMode == 'rotz': self.action_rot_scale = 1. / 10 self.action_space = Box( np.array([-1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1]), ) elif rotMode == 'quat': self.action_space = Box( np.array([-1, -1, -1, 0, -1, -1, -1, -1]), np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]), ) else: self.action_space = Box( np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]), np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]), ) self.obj_and_goal_space = Box(np.array(obj_low), np.array(obj_high), dtype=np.float32) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.observation_space = Box(np.hstack( (self.hand_low, list(obj_low) * self.num_beads)), np.hstack( (self.hand_high, list(obj_high) * self.num_beads)), dtype=np.float32) self.hand_reset_pos = np.array([0, .6, .2]) self.reset()
def __init__(self, puck_low=(-.4, .2), puck_high=(.4, 1), reward_type='state_distance', norm_order=1, indicator_threshold=0.06, hand_low=(-0.28, 0.3, 0.05), hand_high=(0.28, 0.9, 0.3), fix_goal=False, fixed_goal=(0.15, 0.6, 0.02, -0.15, 0.6), goal_low=(-0.25, 0.3, 0.02, -.2, .4, -.2, .4), goal_high=(0.25, 0.875, 0.02, .2, .8, .2, .8), hide_goal_markers=False, init_puck_z=0.02, num_resets_before_puck_reset=1, num_resets_before_hand_reset=1, always_start_on_same_side=True, goal_always_on_same_side=True, **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.hstack((self.hand_low, puck_low, puck_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, puck_high, puck_high)) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.hand_and_two_puck_space = Box(np.hstack( (self.hand_low, puck_low, puck_low)), np.hstack((self.hand_high, puck_high, puck_high)), dtype=np.float32) self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) self.puck_space = Box(self.puck_low, self.puck_high, dtype=np.float32) self.observation_space = Dict([ ('observation', self.hand_and_two_puck_space), ('desired_goal', self.hand_and_two_puck_space), ('achieved_goal', self.hand_and_two_puck_space), ('state_observation', self.hand_and_two_puck_space), ('state_desired_goal', self.hand_and_two_puck_space), ('state_achieved_goal', self.hand_and_two_puck_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.init_puck_z = init_puck_z self.reset_counter = 0 self.num_resets_before_puck_reset = num_resets_before_puck_reset self.num_resets_before_hand_reset = num_resets_before_hand_reset self._always_start_on_same_side = always_start_on_same_side self._goal_always_on_same_side = goal_always_on_same_side self._set_puck_xys(self._sample_puck_xys())
def __init__( self, hand_low=(-0.5, 0.40, 0.05), hand_high=(0.5, 1, 0.5), obj_low=(-0.1, 0.6, 0.02), obj_high=(0.1, 0.7, 0.02), random_init=False, tasks = [{'goal': np.array([-0.1, 0.8, 0.2]), 'obj_init_pos':np.array([0, 0.6, 0.02]), 'obj_init_angle': 0.3}], goal_low=(-0.1, 0.8, 0.05), goal_high=(0.1, 0.9, 0.3), hand_init_pos = (0, 0.6, 0.2), sampleMode='equal', rewMode = 'orig', rotMode='fixed',#'fixed', multitask=False, multitask_num=1, **kwargs ): self.quick_init(locals()) SawyerXYZEnv.__init__( self, frame_skip=5, action_scale=1./100, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs ) if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high self.random_init = random_init self.max_path_length = 150#150 self.tasks = tasks self.num_tasks = len(tasks) self.rewMode = rewMode self.rotMode = rotMode self.sampleMode = sampleMode self.hand_init_pos = np.array(hand_init_pos) self.multitask = multitask if rotMode == 'fixed': self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) elif rotMode == 'rotz': self.action_rot_scale = 1./50 self.action_space = Box( np.array([-1, -1, -1, -np.pi, -1]), np.array([1, 1, 1, np.pi, 1]), ) elif rotMode == 'quat': self.action_space = Box( np.array([-1, -1, -1, 0, -1, -1, -1, -1]), np.array([1, 1, 1, 2*np.pi, 1, 1, 1, 1]), ) else: self.action_space = Box( np.array([-1, -1, -1, -np.pi/2, -np.pi/2, 0, -1]), np.array([1, 1, 1, np.pi/2, np.pi/2, np.pi*2, 1]), ) self.obj_and_goal_space = Box( np.hstack((obj_low, goal_low)), np.hstack((obj_high, goal_high)), ) self.goal_space = Box(np.array(goal_low), np.array(goal_high)) if not multitask: self.observation_space = Box( np.hstack((self.hand_low, goal_low)), np.hstack((self.hand_high, goal_high)), ) else: self.observation_space = Box( np.hstack((self.hand_low, obj_low, np.zeros(multitask_num))), np.hstack((self.hand_high, obj_high, np.zeros(multitask_num))), ) self.num_resets = 0 self.reset()
def __init__( self, obj_low=None, obj_high=None, tasks=[{ 'goal': np.array([0, 0.7, 0.02]), 'height': 0.06, 'obj_init_pos': np.array([0, 0.6, 0.02]) }], goal_low=None, goal_high=None, hand_init_pos=(0, 0.4, 0.05), #hand_init_pos = (0, 0.5, 0.35) , liftThresh=0.04, rewMode='orig', objType='block', **kwargs): self.objType = objType self.quick_init(locals()) SawyerXYZEnv.__init__(self, model_name=self.model_name, **kwargs) if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high #Make sure that objectGeom is the last geom in the XML self.objHeight = self.model.geom_pos[-1][2] self.heightTarget = self.objHeight + liftThresh self.max_path_length = 150 self.tasks = tasks self.num_tasks = len(tasks) self.rewMode = rewMode # defaultTask = tasks[0] # self.obj_init_pos = np.array(tasksobj_init_pos) self.hand_init_pos = np.array(hand_init_pos) self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.goal_space = Box(goal_low, goal_high) self.observation_space = Dict([ ('state_observation', self.hand_and_obj_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ])
def __init__( self, hand_low=(-0.5, 0.40, 0.05), hand_high=(0.5, 1, 0.5), obj_low=(-0.1, 0.7, 0.16), obj_high=(0.1, 0.9, 0.16), random_init=False, # tasks = [{'goal': 0.55, 'obj_init_pos':0.0, 'obj_init_angle': 0.3}], # tasks = [{'goal': np.array([0.1, 0.785, 0.15]), 'obj_init_pos':np.array([-0.1, 0.785, 0.15]), 'obj_init_angle': 0.3}], tasks=[{ 'goal': np.array([0.08, 0.785, 0.15]), 'obj_init_pos': np.array([-0.1, 0.785, 0.15]), 'obj_init_angle': 0.3 }], goal_low=None, goal_high=None, obs_type='plain', hand_init_pos=(0, 0.6, 0.2), liftThresh=0.02, rewMode='orig', rotMode='fixed', #'fixed', multitask=False, multitask_num=1, if_render=False, **kwargs): self.quick_init(locals()) SawyerXYZEnv.__init__(self, frame_skip=5, action_scale=1. / 100, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) assert obs_type in OBS_TYPE if multitask: obs_type = 'with_goal_and_id' self.obs_type = obs_type if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high self.random_init = random_init self.max_path_length = 150 self.tasks = tasks self.num_tasks = len(tasks) self.rewMode = rewMode self.rotMode = rotMode self.hand_init_pos = np.array(hand_init_pos) self.multitask = multitask self.multitask_num = multitask_num self._state_goal_idx = np.zeros(self.multitask_num) self.if_render = if_render self.liftThresh = liftThresh if rotMode == 'fixed': self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), ) elif rotMode == 'rotz': self.action_rot_scale = 1. / 50 self.action_space = Box( np.array([-1, -1, -1, -np.pi, -1]), np.array([1, 1, 1, np.pi, 1]), ) elif rotMode == 'quat': self.action_space = Box( np.array([-1, -1, -1, 0, -1, -1, -1, -1]), np.array([1, 1, 1, 2 * np.pi, 1, 1, 1, 1]), ) else: self.action_space = Box( np.array([-1, -1, -1, -np.pi / 2, -np.pi / 2, 0, -1]), np.array([1, 1, 1, np.pi / 2, np.pi / 2, np.pi * 2, 1]), ) self.obj_and_goal_space = Box( np.array(obj_low), np.array(obj_high), ) self.goal_space = Box(np.array(goal_low), np.array(goal_high)) if not multitask and self.obs_type == 'with_goal_id': self.observation_space = Box( np.hstack((self.hand_low, obj_low, np.zeros(len(tasks)))), np.hstack((self.hand_high, obj_high, np.ones(len(tasks)))), ) elif not multitask and self.obs_type == 'plain': self.observation_space = Box( np.hstack(( self.hand_low, obj_low, )), np.hstack(( self.hand_high, obj_high, )), ) elif not multitask and self.obs_type == 'with_goal': self.observation_space = Box( np.hstack((self.hand_low, obj_low, goal_low)), np.hstack((self.hand_high, obj_high, goal_high)), ) else: self.observation_space = Box( np.hstack((self.hand_low, obj_low, goal_low, np.zeros(multitask_num))), np.hstack((self.hand_high, obj_high, goal_high, np.zeros(multitask_num))), ) self.reset()
def __init__( self, obj_low=None, obj_high=None, tasks = [{'goal': [0, 0.7, 0.02], 'obj_init_pos':[0, 0.6, 0.02]}] , goal_low=None, goal_high=None, hand_init_pos = (0, 0.4, 0.05), rewMode = 'posPlace', **kwargs ): self.quick_init(locals()) SawyerXYZEnv.__init__( self, model_name=self.model_name, **kwargs ) if obj_low is None: obj_low = self.hand_low if goal_low is None: goal_low = self.hand_low if obj_high is None: obj_high = self.hand_high if goal_high is None: goal_high = self.hand_high self.max_path_length = 150 self.tasks = tasks self.num_tasks = len(tasks) self.hand_init_pos = np.array(hand_init_pos) self.action_space = Box( np.array([-1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1]), ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), ) self.goal_space = Box(goal_low, goal_high) self.observation_space = Dict([ ('state_observation', self.hand_and_obj_space), ('desired_goal', self.goal_space) ]) self.reset()
def __init__(self, goal_low=(0, ), goal_high=(1.0472, ), action_reward_scale=0, reward_type='angle_difference', indicator_threshold=(.02, .03), fix_goal=False, fixed_goal=(-.25), reset_free=False, fixed_hand_z=0.12, hand_low=(-0.1, 0.45, 0.15), hand_high=(0., 0.65, .225), target_pos_scale=1, target_angle_scale=1, min_angle=0, max_angle=1.0472, xml_path='sawyer_xyz/sawyer_door_pull_hook.xml', **sawyer_xyz_kwargs): self.quick_init(locals()) self.model_name = get_asset_full_path(xml_path) SawyerXYZEnv.__init__(self, self.model_name, hand_low=hand_low, hand_high=hand_high, **sawyer_xyz_kwargs) MultitaskEnv.__init__(self) # self.initialize_camera(camera) self.reward_type = reward_type self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self.goal_space = Box(np.array(goal_low), np.array(goal_high), dtype=np.float32) self._state_goal = None self.fixed_hand_z = fixed_hand_z self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1]), dtype=np.float32) self.state_space = Box( np.concatenate((hand_low, [min_angle])), np.concatenate((hand_high, [max_angle])), dtype=np.float32, ) self.observation_space = Dict([ ('observation', self.state_space), ('desired_goal', self.goal_space), ('achieved_goal', self.goal_space), ('state_observation', self.state_space), ('state_desired_goal', self.goal_space), ('state_achieved_goal', self.goal_space), ]) self.action_reward_scale = action_reward_scale self.target_pos_scale = target_pos_scale self.target_angle_scale = target_angle_scale self.reset_free = reset_free self.door_angle_idx = self.model.get_joint_qpos_addr('doorjoint') #ensure env does not start in weird positions self.reset_free = True self.reset() self.reset_free = reset_free
def __init__( self, filename='sawyer_multiobj.xml', mode_rel=np.array([True, True, True, True, False]), num_objects=3, object_mass=1, friction=1.0, finger_sensors=True, maxlen=0.12, minlen=0.01, preload_obj_dict=None, object_meshes=[ 'Bowl', 'GlassBowl', 'LotusBowl01', 'ElephantBowl', 'RuggedBowl' ], obj_classname=None, block_height=0.02, block_width=0.02, cylinder_radius=0.05, viewer_image_height=84, viewer_image_width=84, skip_first=100, substeps=100, init_hand_xyz=([-0.00031741, 0.24092109, 0.08816302]), randomize_initial_pos=False, state_goal=None, randomize_goal_at_reset=True, hand_z_position=0.089, fix_z=True, fix_gripper=True, fix_rotation=True, fix_reset_pos=True, do_render=False, match_orientation=False, workspace_low=np.array([-0.31, 0.24, 0]), workspace_high=np.array([0.32, 0.7, 0.14]), hand_low=np.array([-0.31, 0.24, 0.0]), hand_high=np.array([0.32, 0.7, 0.14]), env_seed=0, ): self.quick_init(locals()) self.seed(env_seed) base_filename = asset_base_path + filename friction_params = (friction, 0.1, 0.02) self.obj_stat_prop = create_object_xml( base_filename, num_objects, object_mass, friction_params, object_meshes, finger_sensors, maxlen, minlen, preload_obj_dict, obj_classname, block_height, block_width, cylinder_radius) gen_xml = create_root_xml(base_filename) SawyerXYZEnv.__init__( self, model_name=gen_xml, mocap_low=hand_low, mocap_high=hand_high, ) clean_xml(gen_xml) if self.sim.model.nmocap > 0 and self.sim.model.eq_data is not None: for i in range(self.sim.model.eq_data.shape[0]): if self.sim.model.eq_type[i] == mujoco_py.const.EQ_WELD: # Define the xyz + quat of the mocap relative to the hand self.sim.model.eq_data[i, :] = np.array( [0., 0., 0., 1., 0., 0., 0.]) self._base_sdim, self._base_adim, self.mode_rel = 5, 5, mode_rel self.hand_low = np.array(hand_low) self.hand_high = np.array(hand_high) self.workspace_low = np.array(workspace_low) self.workspace_high = np.array(workspace_high) self.object_low = self.workspace_low.copy() self.object_high = self.workspace_high.copy() self.object_low[0], self.object_low[1] = self.object_low[0] + cylinder_radius, \ self.object_low[1] + cylinder_radius self.object_high[0], self.object_high[1] = self.object_high[0] - cylinder_radius, \ self.object_high[1] - cylinder_radius self.action_scale = 5 / 100 self.num_objects, self.skip_first, self.substeps = num_objects, skip_first, substeps self.randomize_initial_pos = randomize_initial_pos self.finger_sensors, self._maxlen = finger_sensors, maxlen self._threshold = 0 self._previous_target_qpos, self._n_joints = None, 7 self._object_pos = np.zeros((num_objects, 7)) self._reset_xyz = np.zeros((num_objects, 3)) self._reset_quat = np.zeros((num_objects, 4)) self._reset_pos = np.copy(self._object_pos) self._object_names = ['obj_' + str(i) for i in range(num_objects)] self._initialized = False self._state_goal = state_goal self.hand_z_position = hand_z_position self.fix_z = fix_z self.fix_rotation = fix_rotation self.fix_gripper = fix_gripper self._randomize_goal_at_reset = randomize_goal_at_reset self.do_render = do_render self.init_hand_xyz = np.array(init_hand_xyz) self.match_orientation = match_orientation self._object_reset_pos = self.sample_object_positions() self.goal_dim_per_object = 7 if match_orientation else 3 self.cylinder_radius = cylinder_radius # self.finger_sensors = False # turning this off for now action_bounds = [ 1, 1, ] if not fix_z: action_bounds.append(1) if not fix_gripper: action_bounds.append(1) if not fix_rotation: action_bounds.append(1) action_bounds = np.array(action_bounds) self.action_space = Box(-action_bounds, action_bounds, dtype=np.float32) # action space is 5-dim: x, y, z, ? ? self.hand_space = Box(self.hand_low, self.hand_high, dtype=np.float32) obs_dim = 3 * num_objects + 3 obs_space = Box(np.array([-1] * obs_dim), np.array([1] * obs_dim), dtype=np.float32) goal_space = Box( np.array([-1] * self.goal_dim_per_object * num_objects), np.array([1] * self.goal_dim_per_object * num_objects), dtype=np.float32, ) self.observation_space = Dict([ ('observation', obs_space), ('desired_goal', goal_space), ('achieved_goal', goal_space), ('state_observation', obs_space), ('state_desired_goal', goal_space), ('state_achieved_goal', goal_space), ]) self._state_goal = self.sample_goal() self.fix_reset_pos = fix_reset_pos o = self.reset() self.reset()
def __init__( self, obj_low=None, obj_high=None, reward_type='hand_and_obj_distance', indicator_threshold=0.06, obj_init_positions=((0, 0.6, 0.02),), random_init=False, fix_goal=False, fixed_goal=(0.15, 0.6, 0.055, -0.15, 0.6), goal_low=None, goal_high=None, reset_free=False, hide_goal_markers=False, oracle_reset_prob=0.0, presampled_goals=None, num_goals_presampled=1000, p_obj_in_hand=.75, **kwargs ): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__( self, model_name=self.model_name, **kwargs ) if obj_low is None: obj_low = self.hand_low if obj_high is None: obj_high = self.hand_high self.obj_low = obj_low self.obj_high = obj_high if goal_low is None: goal_low = np.hstack((self.hand_low, obj_low)) if goal_high is None: goal_high = np.hstack((self.hand_high, obj_high)) self.reward_type = reward_type self.random_init = random_init self.p_obj_in_hand = p_obj_in_hand self.indicator_threshold = indicator_threshold self.obj_init_z = obj_init_positions[0][2] self.obj_init_positions = np.array(obj_init_positions) self.last_obj_pos = self.obj_init_positions[0] self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.reset_free = reset_free self.oracle_reset_prob = oracle_reset_prob self.hide_goal_markers = hide_goal_markers self.action_space = Box( np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32 ) self.hand_and_obj_space = Box( np.hstack((self.hand_low, obj_low)), np.hstack((self.hand_high, obj_high)), dtype=np.float32 ) self.hand_space = Box( self.hand_low, self.hand_high, dtype=np.float32 ) self.gripper_and_hand_and_obj_space = Box( np.hstack(([0.0], self.hand_low, obj_low)), np.hstack(([0.04], self.hand_high, obj_high)), dtype=np.float32 ) self.observation_space = Dict([ ('observation', self.gripper_and_hand_and_obj_space), ('desired_goal', self.hand_and_obj_space), ('achieved_goal', self.hand_and_obj_space), ('state_observation', self.gripper_and_hand_and_obj_space), ('state_desired_goal', self.hand_and_obj_space), ('state_achieved_goal', self.hand_and_obj_space), ('proprio_observation', self.hand_space), ('proprio_desired_goal', self.hand_space), ('proprio_achieved_goal', self.hand_space), ]) self.hand_reset_pos = np.array([0, .6, .2]) if presampled_goals is not None: self._presampled_goals = presampled_goals self.num_goals_presampled = len(list(self._presampled_goals.values)[0]) else: # presampled_goals will be created when sample_goal is first called self._presampled_goals = None self.num_goals_presampled = num_goals_presampled self.picked_up_object = False self.train_pickups = 0 self.eval_pickups = 0 self.cur_mode = 'train' self.reset()
def __init__(self, reward_type='puck_distance', norm_order=2, indicator_threshold=0.06, fix_goal=False, fixed_goal=(0.0, 0.6, 0.02, -0.15, 0.6), puck_low=None, puck_high=None, hand_low=(-0.25, 0.3, 0.02), hand_high=(0.25, .8, .02), goal_low=(-0.25, 0.3), goal_high=(0.25, 0.8), hide_goal_markers=False, init_puck_z=0.02, reset_free=False, init_puck_pos=(0.0, .65), mode='train', **kwargs): self.quick_init(locals()) MultitaskEnv.__init__(self) SawyerXYZEnv.__init__(self, hand_low=hand_low, hand_high=hand_high, model_name=self.model_name, **kwargs) if puck_low is None: puck_low = self.hand_low[:2] if puck_high is None: puck_high = self.hand_high[:2] puck_low = np.array(puck_low) puck_high = np.array(puck_high) self.puck_low = puck_low self.puck_high = puck_high if goal_low is None: goal_low = np.array(puck_low) if goal_high is None: goal_high = np.array(puck_high) self.goal_low = np.array(goal_low) self.goal_high = np.array(goal_high) self.reward_type = reward_type self.norm_order = norm_order self.indicator_threshold = indicator_threshold self.fix_goal = fix_goal self.fixed_goal = np.array(fixed_goal) self._state_goal = None self.hide_goal_markers = hide_goal_markers self.action_space = Box(np.array([-1, -1, -1]), np.array([1, 1, 1])) self.hand_and_puck_space = Box( np.hstack((self.hand_low, puck_low)), np.hstack((self.hand_high, puck_high)), ) self.puck_space = Box(puck_low, puck_high) self.hand_space = Box(self.hand_low, self.hand_high) self.observation_space = Dict([ ('observation', self.hand_and_puck_space), ('desired_goal', self.puck_space), ('achieved_goal', self.puck_space), ('state_observation', self.hand_and_puck_space), ('state_desired_goal', self.puck_space), ('state_achieved_goal', self.puck_space), ]) self.init_puck_z = init_puck_z self.reset_free = reset_free self.puck_pos = self.get_puck_pos()[:2] self.mode(mode) self.init_puck_pos = np.array(init_puck_pos)