def __init__(self, mujoco_arena, mujoco_robot, mujoco_objects, initializer=None, visual_objects=[]): """ Args: mujoco_arena: MJCF model of robot workspace mujoco_robot: MJCF model of robot model mujoco_objects: a list of MJCF models of physical objects initializer: placement sampler to initialize object positions. visual_objects: (added for pushing task goal,) names of objects that should have conaffinity and contype set to 0 """ super().__init__() self.merge_arena(mujoco_arena) self.merge_robot(mujoco_robot) if mujoco_objects is not None: self.merge_objects(mujoco_objects, visual_objects) if initializer is None: initializer = UniformRandomSampler() mjcfs = [x for _, x in self.mujoco_objects.items()] self.initializer = initializer self.initializer.setup(mjcfs, self.table_top_offset, self.table_size)
def __init__( self, mujoco_arena, mujoco_robot, mujoco_objects, initializer=None, box_pos_array=[np.array([0.53522776, -0.0287869, 0.82162434])], box_quat_array=[[0.8775825618903728, 0, 0, 0.479425538604203]]): """ Args: mujoco_arena: MJCF model of robot workspace mujoco_robot: MJCF model of robot model mujoco_objects: a list of MJCF models of physical objects initializer: placement sampler to initialize object positions. """ super().__init__() # added for custom initialization of box self.box_pos_array = box_pos_array self.box_quat_array = box_quat_array # -------------------------- self.merge_arena(mujoco_arena) self.merge_robot(mujoco_robot) self.merge_objects(mujoco_objects) if initializer is None: initializer = UniformRandomSampler() mjcfs = [x for _, x in self.mujoco_objects.items()] self.initializer = initializer self.initializer.setup(mjcfs, self.table_top_offset, self.table_size)
def __init__( self, gripper_type_right="TwoFingerGripper", gripper_type_left="LeftTwoFingerGripper", table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_object_obs=True, reward_shaping=True, **kwargs ): """ Args: gripper_type_right (str): type of gripper used on the right hand. gripper_type_lefft (str): type of gripper used on the right hand. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_object_obs (bool): if True, include object (pot) information in the observation. reward_shaping (bool): if True, use dense rewards. Inherits the Baxter environment; refer to other parameters described there. """ # initialize the pot self.pot = PotWithHandlesObject() self.mujoco_objects = OrderedDict([("pot", self.pot)]) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping self.object_initializer = UniformRandomSampler( x_range=(-0.15, -0.04), y_range=(-0.015, 0.015), z_rotation=(-0.15 * np.pi, 0.15 * np.pi), ensure_object_boundary_in_range=False, ) super().__init__( gripper_left=gripper_type_left, gripper_right=gripper_type_right, **kwargs )
def __init__(self, use_object_obs=True, reward_shaping=True, placement_initializer=None, object_obs_process=True, **kwargs): """ Args: use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. object_obs_process (bool): if True, process the object observation to get a task_state. Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing. """ # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, z_rotation=None, ) # for first initialization self.table_full_size = (0.8, 0.8, 0.8) self.table_friction = (1.0, 0.005, 0.0001) self.boxobject_size = (0.02, 0.02, 0.02) self.boxobject_friction = (1.0, 0.005, 0.0001) self.boxobject_density = 100. self.object_obs_process = object_obs_process super().__init__(gripper_visualization=True, **kwargs)
def __init__( self, mujoco_arena, mujoco_robots, mujoco_objects, visual_objects=None, initializer=None, ): super().__init__() self.arena = None self.floor_pos = None self.floor_size = None self.merge_arena(mujoco_arena) for mujoco_robot in mujoco_robots: self.merge_robot(mujoco_robot) if initializer is None: initializer = UniformRandomSampler() if mujoco_objects is None: mujoco_objects = collections.OrderedDict() if visual_objects is None: visual_objects = collections.OrderedDict() assert isinstance(mujoco_objects, collections.OrderedDict) assert isinstance(visual_objects, collections.OrderedDict) mujoco_objects = deepcopy(mujoco_objects) visual_objects = deepcopy(visual_objects) # xml manifestations of all objects self.merge_objects(mujoco_objects) self.merge_objects(visual_objects, is_visual=True) merged_objects = collections.OrderedDict(**mujoco_objects, **visual_objects) self.mujoco_objects = mujoco_objects self.visual_objects = visual_objects self.initializer = initializer self.initializer.setup(merged_objects, self.floor_pos, self.floor_size)
def __init__(self, mujoco_arena, mujoco_robot, mujoco_objects, initializer=None): """ Args: mujoco_arena: MJCF model of robot workspace mujoco_robot: MJCF model of robot model mujoco_objects: a list of MJCF models of physical objects initializer: placement sampler to initialize object positions. """ super().__init__() self.merge_arena(mujoco_arena) self.merge_robot(mujoco_robot) self.merge_objects(mujoco_objects) if initializer is None: initializer = UniformRandomSampler() mjcfs = [x for _, x in self.mujoco_objects.items()] self.initializer = initializer self.initializer.setup(mjcfs, self.table_top_offset, self.table_size)
def __init__( self, gripper_type="PandaGripper", table_full_size=(0.8, 0.8, 0.8), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, z_rotation=True, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, )
def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", table_full_size=(0.8, 0.8, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that correct number of robots are being inputted self.env_configuration = env_configuration self._check_robot_configuration(robots) # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.03, 0.03], y_range=[-0.03, 0.03], ensure_object_boundary_in_range=False, rotation=(-np.pi / 3, np.pi / 3), ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, )
def __init__( self, mujoco_arena, mujoco_robots, mujoco_objects_on_table, other_mujoco_objects=None, visual_mujoco_objects_on_table=None, other_visual_mujoco_objects=None, initializer=None, ): super().__init__() self.merge_arena(mujoco_arena) for mujoco_robot in mujoco_robots: self.merge_robot(mujoco_robot) if initializer is None: initializer = UniformRandomSampler() if other_mujoco_objects is None: other_mujoco_objects = collections.OrderedDict() if visual_mujoco_objects_on_table is None: visual_mujoco_objects_on_table = collections.OrderedDict() if other_visual_mujoco_objects is None: other_visual_mujoco_objects = collections.OrderedDict() mujoco_objects_on_table = deepcopy(mujoco_objects_on_table) other_mujoco_objects = deepcopy(other_mujoco_objects) visual_mujoco_objects_on_table = deepcopy( visual_mujoco_objects_on_table) other_visual_mujoco_objects = deepcopy(other_visual_mujoco_objects) assert isinstance(mujoco_objects_on_table, collections.OrderedDict) assert isinstance(other_mujoco_objects, collections.OrderedDict) assert isinstance(visual_mujoco_objects_on_table, collections.OrderedDict) assert isinstance(other_visual_mujoco_objects, collections.OrderedDict) # xml manifestations of all objects self.objects_on_table = [] self.other_objects = [] self.visual_objects_on_table = [] self.other_visual_objects = [] self.merge_objects(mujoco_objects_on_table, other_mujoco_objects, visual_mujoco_objects_on_table, other_visual_mujoco_objects) merged_objects_on_table = collections.OrderedDict( **mujoco_objects_on_table, **visual_mujoco_objects_on_table) self.mujoco_objects_on_table = mujoco_objects_on_table self.other_mujoco_objects = other_mujoco_objects self.visual_mujoco_objects_on_table = visual_mujoco_objects_on_table self.other_visual_mujoco_objects = other_visual_mujoco_objects self.initializer = initializer self.initializer.setup(merged_objects_on_table, self.table_top_offset, self.table_size)
"type": "uniform" } ] # Model params noise_scale = args.noise_scale num_resnet_layers = 50 sequence_length = args.sequence_length feature_layer_nums = (9, ) # Define placement initializer for object rotation_axis = 'y' if args.obj_name == 'hammer' else 'z' placement_initializer = UniformRandomSampler( x_range=[-0.35, 0.35], y_range=[-0.35, 0.35], ensure_object_boundary_in_range=False, rotation=None, rotation_axis=rotation_axis, ) if args.use_placement_initializer else None # Define robosuite model controller_config = suite.load_controller_config( default_controller=args.controller) env = suite.make( args.env, robots=args.robots, #has_renderer=True, #render_camera=camera_name, has_offscreen_renderer=True, camera_depths=args.use_depth, use_camera_obs=True,
from robosuite.environments.sawyer_test_pick import SawyerPrimitivePick random_arm_init = [-0.0, 0.0] # random_arm_init = None instructive = 1.0 decay = 3e-6 env = SawyerPrimitivePick( instructive=instructive, decay=decay, random_arm_init=random_arm_init, has_renderer=True, reward_shaping=False, horizon=150, ignore_done=True, placement_initializer=UniformRandomSampler( x_range=[-0.01, 0.01], y_range=[-0.01, 0.01], ensure_object_boundary_in_range=False, z_rotation=None, ), has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=100, ) # reward_shaping = False # horizon = 500 # env = SawyerReachPick( # # playable params # random_arm_init=random_arm_init, # has_renderer=True, # reward_shaping=reward_shaping, # horizon=horizon,
def __init__( self, robots, controller_configs=None, gripper_types="WipingGripper", gripper_visualizations=False, initialization_noise="default", use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=True, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, task_config=None, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # Assert that the gripper type is None assert gripper_types == "WipingGripper",\ "Tried to specify gripper other than WipingGripper in Wipe environment!" # Get config self.task_config = task_config if task_config is not None else DEFAULT_WIPE_CONFIG # Set task-specific parameters # settings for the reward self.reward_scale = reward_scale self.reward_shaping = reward_shaping self.arm_limit_collision_penalty = self.task_config[ 'arm_limit_collision_penalty'] self.wipe_contact_reward = self.task_config['wipe_contact_reward'] self.unit_wiped_reward = self.task_config['unit_wiped_reward'] self.ee_accel_penalty = self.task_config['ee_accel_penalty'] self.excess_force_penalty_mul = self.task_config[ 'excess_force_penalty_mul'] self.distance_multiplier = self.task_config['distance_multiplier'] self.distance_th_multiplier = self.task_config[ 'distance_th_multiplier'] # Final reward computation # So that is better to finish that to stay touching the table for 100 steps # The 0.5 comes from continuous_distance_reward at 0. If something changes, this may change as well self.task_complete_reward = 50 * (self.wipe_contact_reward + 0.5) # Verify that the distance multiplier is not greater than the task complete reward assert self.task_complete_reward > self.distance_multiplier,\ "Distance multiplier cannot be greater than task complete reward!" # settings for table top self.table_full_size = self.task_config['table_full_size'] self.table_offset = self.task_config['table_offset'] self.table_friction = self.task_config['table_friction'] self.table_friction_std = self.task_config['table_friction_std'] self.table_height = self.task_config['table_height'] self.table_height_std = self.task_config['table_height_std'] self.line_width = self.task_config['line_width'] self.two_clusters = self.task_config['two_clusters'] self.coverage_factor = self.task_config['coverage_factor'] self.num_sensors = self.task_config['num_sensors'] # settings for thresholds self.contact_threshold = self.task_config['contact_threshold'] self.touch_threshold = self.task_config['touch_threshold'] self.pressure_threshold = self.task_config['touch_threshold'] self.pressure_threshold_max = self.task_config[ 'pressure_threshold_max'] # misc settings self.print_results = self.task_config['print_results'] self.get_info = self.task_config['get_info'] self.use_robot_obs = self.task_config['use_robot_obs'] self.early_terminations = self.task_config['early_terminations'] # Scale reward if desired (see reward method for details) self.reward_normalization_factor = horizon / \ (self.num_sensors * self.unit_wiped_reward + horizon * (self.wipe_contact_reward + self.task_complete_reward)) # ee resets self.ee_force_bias = np.zeros(3) self.ee_torque_bias = np.zeros(3) # set other wipe-specific attributes self.wiped_sensors = [] self.collisions = 0 self.f_excess = 0 self.metadata = [] self.spec = "spec" # whether to include and use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[0, 0.2], y_range=[0, 0.2], ensure_object_boundary_in_range=False, rotation=None) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, )
def __init__( self, robots, env_configuration="single-arm-opposed", controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", prehensile=True, table_full_size=(0.8, 1.2, 0.05), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that correct number of robots are being inputted self.env_configuration = env_configuration self._check_robot_configuration(robots) # Task settings self.prehensile = prehensile # settings for table top self.table_full_size = table_full_size self.table_true_size = list(table_full_size) self.table_true_size[ 1] *= 0.25 # true size will only be partially wide self.table_friction = table_friction self.table_offset = [0, self.table_full_size[1] * (-3 / 8), 0.8] # reward configuration self.reward_scale = reward_scale self.reward_shaping = reward_shaping self.height_threshold = 0.1 # threshold above the table surface which the hammer is considered lifted # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper rotation_axis = 'y' if self.prehensile else 'z' self.placement_initializer = UniformRandomSampler( x_range=[-0.1, 0.1], y_range=[-0.05, 0.05], ensure_object_boundary_in_range=False, rotation=None, rotation_axis=rotation_axis, ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, )
def simulate_policy(args): paths = [] sub_level_policies = [] sub_level_policies_paths = [] render = True if args.domain == 'sawyer-reach': print("Composition Reach") goal_size = 0 sub_level_policies_paths.append("ikx") sub_level_policies_paths.append("iky") sub_level_policies_paths.append("ikz") random_arm_init = [-0.1, 0.1] reward_shaping = True horizon = 250 env = normalize( CRLWrapper( IKWrapper( SawyerReach( # playable params random_arm_init=random_arm_init, has_renderer=render, reward_shaping=reward_shaping, horizon=horizon, # constant params has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=100, )))) elif args.domain == 'sawyer-reach-pick': print("Composition Reach and Pick") goal_size = 3 sub_level_policies_paths.append( "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl") sub_level_policies_paths.append( "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl") random_arm_init = [-0.0001, 0.0001] reward_shaping = False horizon = 1000 env = normalize( CRLWrapper( SawyerReachPick( # playable params random_arm_init=random_arm_init, has_renderer=render, reward_shaping=reward_shaping, horizon=horizon, # constant params has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=100, ))) elif args.domain == 'sawyer-reach-pick-simple': print("Composition Reach and Pick Simple") goal_size = 3 sub_level_policies_paths.append( "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl") sub_level_policies_paths.append( "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl") render = True random_arm_init = [-0.0001, 0.0001] reward_shaping = False horizon = 500 env = normalize( CRLWrapper( SawyerReachPick( # playable params random_arm_init=random_arm_init, has_renderer=render, reward_shaping=reward_shaping, horizon=horizon, placement_initializer=UniformRandomSampler( x_range=[-0.02, 0.02], y_range=[-0.02, 0.02], ensure_object_boundary_in_range=False, z_rotation=None, ), # constant params has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=100, ))) else: print("NO DOMAIN") with tf.Session() as sess: for p in range(0, len(sub_level_policies_paths)): path = sub_level_policies_paths[p] if path[:2] == 'ik': with tf.variable_scope(str(p), reuse=False): policy_snapshot = IK_Policy(path[2]) sub_level_policies.append(policy_snapshot) else: with tf.variable_scope(str(p), reuse=False): policy_snapshot = joblib.load(sub_level_policies_paths[p]) sub_level_policies.append(policy_snapshot["policy"]) data = joblib.load(args.file) if 'algo' in data.keys(): policy = data['algo'].policy # env = data['algo'].env else: policy = data['policy'] # env = data['env'] mean_reward = [] with policy.deterministic(args.deterministic): Npath = 0 while Npath < args.trials: path = rollout(env, policy, sub_level_policies, path_length=horizon - 1, g=goal_size) # if np.sum(path["rewards"])>=args.reward_min: # print(np.sum(path["rewards"])) # paths.append(dict( # observations= env.observation_space.flatten_n(path["observations"]), # actions= env.observation_space.flatten_n(path["actions"]), # rewards= tensor_utils.stack_tensor_list(path["rewards"]), # env_infos= path["env_infos"], # agent_infos= path["agent_infos"], # goal = path["goal"] # )) Npath += 1 mean_reward.append(np.sum(path["rewards"])) print("Average Return {}+/-{}".format(np.mean(mean_reward), np.std(mean_reward))) # fileName = osp.join(args.FilePath,'itr.pkl') # joblib.dump(paths,fileName, compress=3) return env
def __init__( self, instructive=0.0, decay=0.0, random_arm_init=None, gripper_type="TwoFingerGripper", table_full_size=(0.8, 0.8, 0.6), table_friction=(1., 5e-3, 1e-4), use_camera_obs=True, use_object_obs=True, reward_shaping=False, placement_initializer=None, gripper_visualization=False, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, camera_name="frontview", camera_height=256, camera_width=256, camera_depth=False, ): """ Args: gripper_type (str): type of gripper, used to instantiate gripper models from gripper factory. table_full_size (3-tuple): x, y, and z dimensions of the table. table_friction (3-tuple): the three mujoco friction parameters for the table. use_camera_obs (bool): if True, every observation includes a rendered image. use_object_obs (bool): if True, include object (cube) information in the observation. reward_shaping (bool): if True, use dense rewards. placement_initializer (ObjectPositionSampler instance): if provided, will be used to place objects on every reset, else a UniformRandomSampler is used by default. gripper_visualization (bool): True if using gripper visualization. Useful for teleoperation. use_indicator_object (bool): if True, sets up an indicator object that is useful for debugging. has_renderer (bool): If true, render the simulation state in a viewer instead of headless mode. has_offscreen_renderer (bool): True if using off-screen rendering. render_collision_mesh (bool): True if rendering collision meshes in camera. False otherwise. render_visual_mesh (bool): True if rendering visual meshes in camera. False otherwise. control_freq (float): how many control signals to receive in every second. This sets the amount of simulation time that passes between every action input. horizon (int): Every episode lasts for exactly @horizon timesteps. ignore_done (bool): True if never terminating the environment (ignore @horizon). camera_name (str): name of camera to be rendered. Must be set if @use_camera_obs is True. camera_height (int): height of camera frame. camera_width (int): width of camera frame. camera_depth (bool): True if rendering RGB-D, and RGB otherwise. """ self.random_arm_init = random_arm_init self.instructive = instructive self.instructive_counter = 0 self.eval_mode = False self.decay = decay # settings for table top self.table_full_size = table_full_size self.table_friction = table_friction # whether to use ground-truth object states self.use_object_obs = use_object_obs # reward configuration self.reward_shaping = reward_shaping # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[-0.00, 0.00], y_range=[-0.00, 0.00], ensure_object_boundary_in_range=False, z_rotation=None, ) # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal # dim: 3, 2, 3, 3 # low = -np.ones(11) * np.inf # high = np.ones(11) * np.inf # self.observation_space = spaces.Box(low=low, high=high) self.controller = SawyerIKController( bullet_data_path=os.path.join(robosuite.models.assets_root, "bullet_data"), robot_jpos_getter=self._robot_jpos_getter, ) super().__init__( gripper_type=gripper_type, gripper_visualization=gripper_visualization, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, use_camera_obs=use_camera_obs, camera_name=camera_name, camera_height=camera_height, camera_width=camera_width, camera_depth=camera_depth, )
def __init__( self, robots, controller_configs=None, gripper_types="default", gripper_visualizations=False, initialization_noise="default", use_latch=True, use_camera_obs=True, use_object_obs=True, reward_scale=1.0, reward_shaping=False, placement_initializer=None, use_indicator_object=False, has_renderer=False, has_offscreen_renderer=True, render_camera="frontview", render_collision_mesh=False, render_visual_mesh=True, control_freq=10, horizon=1000, ignore_done=False, hard_reset=True, camera_names="agentview", camera_heights=256, camera_widths=256, camera_depths=False, ): # First, verify that only one robot is being inputted self._check_robot_configuration(robots) # settings for table top (hardcoded since it's not an essential part of the environment) self.table_full_size = (0.8, 0.3, 0.05) self.table_offset = (-0.2, -0.35, 0.8) # reward configuration self.use_latch = use_latch self.reward_scale = reward_scale self.reward_shaping = reward_shaping # whether to use ground-truth object states self.use_object_obs = use_object_obs # object placement initializer if placement_initializer: self.placement_initializer = placement_initializer else: self.placement_initializer = UniformRandomSampler( x_range=[0.07, 0.09], y_range=[-0.01, 0.01], ensure_object_boundary_in_range=False, rotation=(-np.pi / 2. - 0.25, -np.pi / 2.), rotation_axis='z', ) super().__init__( robots=robots, controller_configs=controller_configs, gripper_types=gripper_types, gripper_visualizations=gripper_visualizations, initialization_noise=initialization_noise, use_camera_obs=use_camera_obs, use_indicator_object=use_indicator_object, has_renderer=has_renderer, has_offscreen_renderer=has_offscreen_renderer, render_camera=render_camera, render_collision_mesh=render_collision_mesh, render_visual_mesh=render_visual_mesh, control_freq=control_freq, horizon=horizon, ignore_done=ignore_done, hard_reset=hard_reset, camera_names=camera_names, camera_heights=camera_heights, camera_widths=camera_widths, camera_depths=camera_depths, )
def run_experiment(variant): sub_level_policies_paths = [] args = arg() if args.domain == 'sawyer-reach': print("Composition Reach") goal_size = 0 sub_level_policies_paths.append("ikx") sub_level_policies_paths.append("iky") sub_level_policies_paths.append("ikz") random_arm_init = [-0.1, 0.1] render = False reward_shaping = True horizon = 250 env = normalize( CRLWrapper( IKWrapper( SawyerReach( # playable params random_arm_init=random_arm_init, has_renderer=render, reward_shaping=reward_shaping, horizon=horizon, # constant params has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=100, )))) ep_length = 1500 elif args.domain == 'sawyer-reach-pick': print("Composition Reach and Pick") goal_size = 3 sub_level_policies_paths.append( "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl") sub_level_policies_paths.append( "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl") render = False random_arm_init = [-0.0001, 0.0001] reward_shaping = False horizon = 1000 env = normalize( CRLWrapper( SawyerReachPick( # playable params random_arm_init=random_arm_init, has_renderer=render, reward_shaping=reward_shaping, horizon=horizon, # constant params has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=100, ))) ep_length = 1500 elif args.domain == 'sawyer-reach-pick-simple': print("Composition Reach and Pick Simple") goal_size = 3 sub_level_policies_paths.append( "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl") sub_level_policies_paths.append( "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl") render = False random_arm_init = [-0.0001, 0.0001] reward_shaping = False horizon = 500 env = normalize( CRLWrapper( SawyerReachPick( # playable params random_arm_init=random_arm_init, has_renderer=render, reward_shaping=reward_shaping, horizon=horizon, placement_initializer=UniformRandomSampler( x_range=[-0.01, 0.01], y_range=[-0.01, 0.01], ensure_object_boundary_in_range=False, z_rotation=None, ), # constant params has_offscreen_renderer=False, use_camera_obs=False, use_object_obs=True, control_freq=100, ))) ep_length = 3000 else: raise ValueError("Domain not available") if args.demo: pool = DemoReplayBuffer( env_spec=env.spec, max_replay_buffer_size=1e6, seq_len=len(sub_level_policies_paths), ) else: pool = SimpleReplayBuffer( env_spec=env.spec, max_replay_buffer_size=1e6, seq_len=len(sub_level_policies_paths), ) sampler = SimpleSampler( max_path_length=horizon - 1, # should be same as horizon min_pool_size=1000, batch_size=256) base_kwargs = dict( epoch_length=ep_length, n_epochs=5e3, # n_epochs=5, n_train_repeat=1, eval_render=False, eval_n_episodes=1, eval_deterministic=True, sampler=sampler, use_demos=args.demo, ) M = 128 qf1 = NNQFunction(env_spec=env.spec, hidden_layer_sizes=(M, M), name='qf1') qf2 = NNQFunction(env_spec=env.spec, hidden_layer_sizes=(M, M), name='qf2') vf = NNVFunction(env_spec=env.spec, hidden_layer_sizes=(M, M)) initial_exploration_policy = UniformPolicy(env_spec=env.spec) policy = GaussianPtrPolicy( env_spec=env.spec, hidden_layer_sizes=(M, M), reparameterize=True, reg=1e-3, ) algorithm = SAC( base_kwargs=base_kwargs, env=env, g=goal_size, policy=policy, sub_level_policies_paths=sub_level_policies_paths, initial_exploration_policy=initial_exploration_policy, pool=pool, qf1=qf1, qf2=qf2, vf=vf, lr=3e-4, scale_reward=5, discount=0.99, tau=0.005, reparameterize=True, target_update_interval=1, action_prior='uniform', save_full_state=False, ) algorithm._sess.run(tf.global_variables_initializer()) algorithm.train()