Example #1
0
    def __init__(self,
                 mujoco_arena,
                 mujoco_robot,
                 mujoco_objects,
                 initializer=None,
                 visual_objects=[]):
        """
        Args:
            mujoco_arena: MJCF model of robot workspace
            mujoco_robot: MJCF model of robot model
            mujoco_objects: a list of MJCF models of physical objects
            initializer: placement sampler to initialize object positions.
            visual_objects: (added for pushing task goal,) names of objects that should have conaffinity and contype set to 0
        """
        super().__init__()

        self.merge_arena(mujoco_arena)
        self.merge_robot(mujoco_robot)

        if mujoco_objects is not None:
            self.merge_objects(mujoco_objects, visual_objects)
            if initializer is None:
                initializer = UniformRandomSampler()
            mjcfs = [x for _, x in self.mujoco_objects.items()]

            self.initializer = initializer
            self.initializer.setup(mjcfs, self.table_top_offset,
                                   self.table_size)
Example #2
0
    def __init__(
            self,
            mujoco_arena,
            mujoco_robot,
            mujoco_objects,
            initializer=None,
            box_pos_array=[np.array([0.53522776, -0.0287869, 0.82162434])],
            box_quat_array=[[0.8775825618903728, 0, 0, 0.479425538604203]]):
        """
        Args:
            mujoco_arena: MJCF model of robot workspace
            mujoco_robot: MJCF model of robot model
            mujoco_objects: a list of MJCF models of physical objects
            initializer: placement sampler to initialize object positions.
        """
        super().__init__()

        # added for custom initialization of box
        self.box_pos_array = box_pos_array
        self.box_quat_array = box_quat_array
        # --------------------------

        self.merge_arena(mujoco_arena)
        self.merge_robot(mujoco_robot)
        self.merge_objects(mujoco_objects)
        if initializer is None:
            initializer = UniformRandomSampler()
        mjcfs = [x for _, x in self.mujoco_objects.items()]

        self.initializer = initializer
        self.initializer.setup(mjcfs, self.table_top_offset, self.table_size)
Example #3
0
    def __init__(
        self,
        gripper_type_right="TwoFingerGripper",
        gripper_type_left="LeftTwoFingerGripper",
        table_full_size=(0.8, 0.8, 0.8),
        table_friction=(1., 5e-3, 1e-4),
        use_object_obs=True,
        reward_shaping=True,
        **kwargs
    ):
        """
        Args:

            gripper_type_right (str): type of gripper used on the right hand.

            gripper_type_lefft (str): type of gripper used on the right hand.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_object_obs (bool): if True, include object (pot) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

        Inherits the Baxter environment; refer to other parameters described there.
        """

        # initialize the pot
        self.pot = PotWithHandlesObject()
        self.mujoco_objects = OrderedDict([("pot", self.pot)])

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        self.object_initializer = UniformRandomSampler(
            x_range=(-0.15, -0.04),
            y_range=(-0.015, 0.015),
            z_rotation=(-0.15 * np.pi, 0.15 * np.pi),
            ensure_object_boundary_in_range=False,
        )

        super().__init__(
            gripper_left=gripper_type_left, gripper_right=gripper_type_right, **kwargs
        )
Example #4
0
    def __init__(self,
                 use_object_obs=True,
                 reward_shaping=True,
                 placement_initializer=None,
                 object_obs_process=True,
                 **kwargs):
        """
        Args:

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            object_obs_process (bool): if True, process the object observation to get a task_state.
                Setting this to False is useful when some transformation (eg. noise) need to be done to object observation raw data prior to the processing.
        """

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # for first initialization
        self.table_full_size = (0.8, 0.8, 0.8)
        self.table_friction = (1.0, 0.005, 0.0001)
        self.boxobject_size = (0.02, 0.02, 0.02)
        self.boxobject_friction = (1.0, 0.005, 0.0001)
        self.boxobject_density = 100.

        self.object_obs_process = object_obs_process

        super().__init__(gripper_visualization=True, **kwargs)
Example #5
0
    def __init__(
        self,
        mujoco_arena,
        mujoco_robots,
        mujoco_objects,
        visual_objects=None,
        initializer=None,
    ):
        super().__init__()

        self.arena = None
        self.floor_pos = None
        self.floor_size = None

        self.merge_arena(mujoco_arena)
        for mujoco_robot in mujoco_robots:
            self.merge_robot(mujoco_robot)

        if initializer is None:
            initializer = UniformRandomSampler()

        if mujoco_objects is None:
            mujoco_objects = collections.OrderedDict()
        if visual_objects is None:
            visual_objects = collections.OrderedDict()

        assert isinstance(mujoco_objects, collections.OrderedDict)
        assert isinstance(visual_objects, collections.OrderedDict)

        mujoco_objects = deepcopy(mujoco_objects)
        visual_objects = deepcopy(visual_objects)

        # xml manifestations of all objects
        self.merge_objects(mujoco_objects)
        self.merge_objects(visual_objects, is_visual=True)

        merged_objects = collections.OrderedDict(**mujoco_objects,
                                                 **visual_objects)
        self.mujoco_objects = mujoco_objects
        self.visual_objects = visual_objects

        self.initializer = initializer
        self.initializer.setup(merged_objects, self.floor_pos, self.floor_size)
    def __init__(self, mujoco_arena, mujoco_robot, mujoco_objects, initializer=None):
        """
        Args:
            mujoco_arena: MJCF model of robot workspace
            mujoco_robot: MJCF model of robot model
            mujoco_objects: a list of MJCF models of physical objects
            initializer: placement sampler to initialize object positions.
        """
        super().__init__()

        self.merge_arena(mujoco_arena)
        self.merge_robot(mujoco_robot)
        self.merge_objects(mujoco_objects)
        if initializer is None:
            initializer = UniformRandomSampler()
        mjcfs = [x for _, x in self.mujoco_objects.items()]

        self.initializer = initializer
        self.initializer.setup(mjcfs, self.table_top_offset, self.table_size)
Example #7
0
    def __init__(
        self,
        gripper_type="PandaGripper",
        table_full_size=(0.8, 0.8, 0.8),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                z_rotation=True,
            )

        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )
Example #8
0
    def __init__(
        self,
        robots,
        env_configuration="single-arm-opposed",
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        table_full_size=(0.8, 0.8, 0.05),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        # First, verify that correct number of robots are being inputted
        self.env_configuration = env_configuration
        self._check_robot_configuration(robots)

        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # reward configuration
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.03, 0.03],
                y_range=[-0.03, 0.03],
                ensure_object_boundary_in_range=False,
                rotation=(-np.pi / 3, np.pi / 3),
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )
Example #9
0
    def __init__(
        self,
        mujoco_arena,
        mujoco_robots,
        mujoco_objects_on_table,
        other_mujoco_objects=None,
        visual_mujoco_objects_on_table=None,
        other_visual_mujoco_objects=None,
        initializer=None,
    ):
        super().__init__()

        self.merge_arena(mujoco_arena)
        for mujoco_robot in mujoco_robots:
            self.merge_robot(mujoco_robot)

        if initializer is None:
            initializer = UniformRandomSampler()

        if other_mujoco_objects is None:
            other_mujoco_objects = collections.OrderedDict()

        if visual_mujoco_objects_on_table is None:
            visual_mujoco_objects_on_table = collections.OrderedDict()

        if other_visual_mujoco_objects is None:
            other_visual_mujoco_objects = collections.OrderedDict()

        mujoco_objects_on_table = deepcopy(mujoco_objects_on_table)
        other_mujoco_objects = deepcopy(other_mujoco_objects)
        visual_mujoco_objects_on_table = deepcopy(
            visual_mujoco_objects_on_table)
        other_visual_mujoco_objects = deepcopy(other_visual_mujoco_objects)

        assert isinstance(mujoco_objects_on_table, collections.OrderedDict)
        assert isinstance(other_mujoco_objects, collections.OrderedDict)
        assert isinstance(visual_mujoco_objects_on_table,
                          collections.OrderedDict)
        assert isinstance(other_visual_mujoco_objects, collections.OrderedDict)

        # xml manifestations of all objects
        self.objects_on_table = []
        self.other_objects = []
        self.visual_objects_on_table = []
        self.other_visual_objects = []

        self.merge_objects(mujoco_objects_on_table, other_mujoco_objects,
                           visual_mujoco_objects_on_table,
                           other_visual_mujoco_objects)

        merged_objects_on_table = collections.OrderedDict(
            **mujoco_objects_on_table, **visual_mujoco_objects_on_table)

        self.mujoco_objects_on_table = mujoco_objects_on_table
        self.other_mujoco_objects = other_mujoco_objects
        self.visual_mujoco_objects_on_table = visual_mujoco_objects_on_table
        self.other_visual_mujoco_objects = other_visual_mujoco_objects

        self.initializer = initializer
        self.initializer.setup(merged_objects_on_table, self.table_top_offset,
                               self.table_size)
            "type": "uniform"
        }
    ]

# Model params
noise_scale = args.noise_scale
num_resnet_layers = 50
sequence_length = args.sequence_length
feature_layer_nums = (9, )

# Define placement initializer for object
rotation_axis = 'y' if args.obj_name == 'hammer' else 'z'
placement_initializer = UniformRandomSampler(
    x_range=[-0.35, 0.35],
    y_range=[-0.35, 0.35],
    ensure_object_boundary_in_range=False,
    rotation=None,
    rotation_axis=rotation_axis,
) if args.use_placement_initializer else None

# Define robosuite model
controller_config = suite.load_controller_config(
    default_controller=args.controller)
env = suite.make(
    args.env,
    robots=args.robots,
    #has_renderer=True,
    #render_camera=camera_name,
    has_offscreen_renderer=True,
    camera_depths=args.use_depth,
    use_camera_obs=True,
 from robosuite.environments.sawyer_test_pick import SawyerPrimitivePick
 random_arm_init = [-0.0, 0.0]
 # random_arm_init = None
 instructive = 1.0
 decay = 3e-6
 env = SawyerPrimitivePick(
     instructive=instructive,
     decay=decay,
     random_arm_init=random_arm_init,
     has_renderer=True,
     reward_shaping=False,
     horizon=150,
     ignore_done=True,
     placement_initializer=UniformRandomSampler(
         x_range=[-0.01, 0.01],
         y_range=[-0.01, 0.01],
         ensure_object_boundary_in_range=False,
         z_rotation=None,
     ),
     has_offscreen_renderer=False,
     use_camera_obs=False,
     use_object_obs=True,
     control_freq=100,
 )
 # reward_shaping = False
 # horizon = 500
 # env = SawyerReachPick(
 #     # playable params
 #     random_arm_init=random_arm_init,
 #     has_renderer=True,
 #     reward_shaping=reward_shaping,
 #     horizon=horizon,
Example #12
0
    def __init__(
        self,
        robots,
        controller_configs=None,
        gripper_types="WipingGripper",
        gripper_visualizations=False,
        initialization_noise="default",
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=True,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
        task_config=None,
    ):
        # First, verify that only one robot is being inputted
        self._check_robot_configuration(robots)

        # Assert that the gripper type is None
        assert gripper_types == "WipingGripper",\
            "Tried to specify gripper other than WipingGripper in Wipe environment!"

        # Get config
        self.task_config = task_config if task_config is not None else DEFAULT_WIPE_CONFIG

        # Set task-specific parameters

        # settings for the reward
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping
        self.arm_limit_collision_penalty = self.task_config[
            'arm_limit_collision_penalty']
        self.wipe_contact_reward = self.task_config['wipe_contact_reward']
        self.unit_wiped_reward = self.task_config['unit_wiped_reward']
        self.ee_accel_penalty = self.task_config['ee_accel_penalty']
        self.excess_force_penalty_mul = self.task_config[
            'excess_force_penalty_mul']
        self.distance_multiplier = self.task_config['distance_multiplier']
        self.distance_th_multiplier = self.task_config[
            'distance_th_multiplier']
        # Final reward computation
        # So that is better to finish that to stay touching the table for 100 steps
        # The 0.5 comes from continuous_distance_reward at 0. If something changes, this may change as well
        self.task_complete_reward = 50 * (self.wipe_contact_reward + 0.5)
        # Verify that the distance multiplier is not greater than the task complete reward
        assert self.task_complete_reward > self.distance_multiplier,\
            "Distance multiplier cannot be greater than task complete reward!"

        # settings for table top
        self.table_full_size = self.task_config['table_full_size']
        self.table_offset = self.task_config['table_offset']
        self.table_friction = self.task_config['table_friction']
        self.table_friction_std = self.task_config['table_friction_std']
        self.table_height = self.task_config['table_height']
        self.table_height_std = self.task_config['table_height_std']
        self.line_width = self.task_config['line_width']
        self.two_clusters = self.task_config['two_clusters']
        self.coverage_factor = self.task_config['coverage_factor']
        self.num_sensors = self.task_config['num_sensors']

        # settings for thresholds
        self.contact_threshold = self.task_config['contact_threshold']
        self.touch_threshold = self.task_config['touch_threshold']
        self.pressure_threshold = self.task_config['touch_threshold']
        self.pressure_threshold_max = self.task_config[
            'pressure_threshold_max']

        # misc settings
        self.print_results = self.task_config['print_results']
        self.get_info = self.task_config['get_info']
        self.use_robot_obs = self.task_config['use_robot_obs']
        self.early_terminations = self.task_config['early_terminations']

        # Scale reward if desired (see reward method for details)
        self.reward_normalization_factor = horizon / \
            (self.num_sensors * self.unit_wiped_reward +
             horizon * (self.wipe_contact_reward + self.task_complete_reward))

        # ee resets
        self.ee_force_bias = np.zeros(3)
        self.ee_torque_bias = np.zeros(3)

        # set other wipe-specific attributes
        self.wiped_sensors = []
        self.collisions = 0
        self.f_excess = 0
        self.metadata = []
        self.spec = "spec"

        # whether to include and use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[0, 0.2],
                y_range=[0, 0.2],
                ensure_object_boundary_in_range=False,
                rotation=None)

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )
Example #13
0
    def __init__(
        self,
        robots,
        env_configuration="single-arm-opposed",
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        prehensile=True,
        table_full_size=(0.8, 1.2, 0.05),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        # First, verify that correct number of robots are being inputted
        self.env_configuration = env_configuration
        self._check_robot_configuration(robots)

        # Task settings
        self.prehensile = prehensile

        # settings for table top
        self.table_full_size = table_full_size
        self.table_true_size = list(table_full_size)
        self.table_true_size[
            1] *= 0.25  # true size will only be partially wide
        self.table_friction = table_friction
        self.table_offset = [0, self.table_full_size[1] * (-3 / 8), 0.8]

        # reward configuration
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping
        self.height_threshold = 0.1  # threshold above the table surface which the hammer is considered lifted

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            # Set rotation about y-axis if hammer starts on table else rotate about z if it starts in gripper
            rotation_axis = 'y' if self.prehensile else 'z'
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.1, 0.1],
                y_range=[-0.05, 0.05],
                ensure_object_boundary_in_range=False,
                rotation=None,
                rotation_axis=rotation_axis,
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )
def simulate_policy(args):
    paths = []
    sub_level_policies = []
    sub_level_policies_paths = []

    render = True
    if args.domain == 'sawyer-reach':
        print("Composition Reach")
        goal_size = 0
        sub_level_policies_paths.append("ikx")
        sub_level_policies_paths.append("iky")
        sub_level_policies_paths.append("ikz")
        random_arm_init = [-0.1, 0.1]

        reward_shaping = True
        horizon = 250
        env = normalize(
            CRLWrapper(
                IKWrapper(
                    SawyerReach(
                        # playable params
                        random_arm_init=random_arm_init,
                        has_renderer=render,
                        reward_shaping=reward_shaping,
                        horizon=horizon,

                        # constant params
                        has_offscreen_renderer=False,
                        use_camera_obs=False,
                        use_object_obs=True,
                        control_freq=100,
                    ))))
    elif args.domain == 'sawyer-reach-pick':
        print("Composition Reach and Pick")
        goal_size = 3
        sub_level_policies_paths.append(
            "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl")
        sub_level_policies_paths.append(
            "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl")

        random_arm_init = [-0.0001, 0.0001]
        reward_shaping = False
        horizon = 1000
        env = normalize(
            CRLWrapper(
                SawyerReachPick(
                    # playable params
                    random_arm_init=random_arm_init,
                    has_renderer=render,
                    reward_shaping=reward_shaping,
                    horizon=horizon,

                    # constant params
                    has_offscreen_renderer=False,
                    use_camera_obs=False,
                    use_object_obs=True,
                    control_freq=100,
                )))
    elif args.domain == 'sawyer-reach-pick-simple':
        print("Composition Reach and Pick Simple")
        goal_size = 3
        sub_level_policies_paths.append(
            "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl")
        sub_level_policies_paths.append(
            "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl")

        render = True

        random_arm_init = [-0.0001, 0.0001]
        reward_shaping = False
        horizon = 500
        env = normalize(
            CRLWrapper(
                SawyerReachPick(
                    # playable params
                    random_arm_init=random_arm_init,
                    has_renderer=render,
                    reward_shaping=reward_shaping,
                    horizon=horizon,
                    placement_initializer=UniformRandomSampler(
                        x_range=[-0.02, 0.02],
                        y_range=[-0.02, 0.02],
                        ensure_object_boundary_in_range=False,
                        z_rotation=None,
                    ),
                    # constant params
                    has_offscreen_renderer=False,
                    use_camera_obs=False,
                    use_object_obs=True,
                    control_freq=100,
                )))
    else:
        print("NO DOMAIN")

    with tf.Session() as sess:
        for p in range(0, len(sub_level_policies_paths)):
            path = sub_level_policies_paths[p]
            if path[:2] == 'ik':
                with tf.variable_scope(str(p), reuse=False):
                    policy_snapshot = IK_Policy(path[2])
                sub_level_policies.append(policy_snapshot)
            else:
                with tf.variable_scope(str(p), reuse=False):
                    policy_snapshot = joblib.load(sub_level_policies_paths[p])
                sub_level_policies.append(policy_snapshot["policy"])

        data = joblib.load(args.file)
        if 'algo' in data.keys():
            policy = data['algo'].policy
            # env = data['algo'].env
        else:
            policy = data['policy']
            # env = data['env']

        mean_reward = []
        with policy.deterministic(args.deterministic):
            Npath = 0
            while Npath < args.trials:
                path = rollout(env,
                               policy,
                               sub_level_policies,
                               path_length=horizon - 1,
                               g=goal_size)
                # if np.sum(path["rewards"])>=args.reward_min:
                #     print(np.sum(path["rewards"]))
                #     paths.append(dict(
                #         observations= env.observation_space.flatten_n(path["observations"]),
                #         actions= env.observation_space.flatten_n(path["actions"]),
                #         rewards= tensor_utils.stack_tensor_list(path["rewards"]),
                #         env_infos= path["env_infos"],
                #         agent_infos= path["agent_infos"],
                #         goal = path["goal"]
                #     ))
                Npath += 1
                mean_reward.append(np.sum(path["rewards"]))
            print("Average Return {}+/-{}".format(np.mean(mean_reward),
                                                  np.std(mean_reward)))
            # fileName = osp.join(args.FilePath,'itr.pkl')
            # joblib.dump(paths,fileName, compress=3)
        return env
    def __init__(
        self,
        instructive=0.0,
        decay=0.0,
        random_arm_init=None,
        gripper_type="TwoFingerGripper",
        table_full_size=(0.8, 0.8, 0.6),
        table_friction=(1., 5e-3, 1e-4),
        use_camera_obs=True,
        use_object_obs=True,
        reward_shaping=False,
        placement_initializer=None,
        gripper_visualization=False,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        camera_name="frontview",
        camera_height=256,
        camera_width=256,
        camera_depth=False,
    ):
        """
        Args:

            gripper_type (str): type of gripper, used to instantiate
                gripper models from gripper factory.

            table_full_size (3-tuple): x, y, and z dimensions of the table.

            table_friction (3-tuple): the three mujoco friction parameters for
                the table.

            use_camera_obs (bool): if True, every observation includes a
                rendered image.

            use_object_obs (bool): if True, include object (cube) information in
                the observation.

            reward_shaping (bool): if True, use dense rewards.

            placement_initializer (ObjectPositionSampler instance): if provided, will
                be used to place objects on every reset, else a UniformRandomSampler
                is used by default.

            gripper_visualization (bool): True if using gripper visualization.
                Useful for teleoperation.

            use_indicator_object (bool): if True, sets up an indicator object that
                is useful for debugging.

            has_renderer (bool): If true, render the simulation state in
                a viewer instead of headless mode.

            has_offscreen_renderer (bool): True if using off-screen rendering.

            render_collision_mesh (bool): True if rendering collision meshes
                in camera. False otherwise.

            render_visual_mesh (bool): True if rendering visual meshes
                in camera. False otherwise.

            control_freq (float): how many control signals to receive
                in every second. This sets the amount of simulation time
                that passes between every action input.

            horizon (int): Every episode lasts for exactly @horizon timesteps.

            ignore_done (bool): True if never terminating the environment (ignore @horizon).

            camera_name (str): name of camera to be rendered. Must be
                set if @use_camera_obs is True.

            camera_height (int): height of camera frame.

            camera_width (int): width of camera frame.

            camera_depth (bool): True if rendering RGB-D, and RGB otherwise.
        """
        self.random_arm_init = random_arm_init
        self.instructive = instructive
        self.instructive_counter = 0
        self.eval_mode = False
        self.decay = decay
        # settings for table top
        self.table_full_size = table_full_size
        self.table_friction = table_friction

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # reward configuration
        self.reward_shaping = reward_shaping

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[-0.00, 0.00],
                y_range=[-0.00, 0.00],
                ensure_object_boundary_in_range=False,
                z_rotation=None,
            )

        # order: di["eef_pos"] di["gripper_qpos"] di["gripper_to_marker"] self.goal
        # dim: 3, 2, 3, 3
        # low = -np.ones(11) * np.inf
        # high = np.ones(11) * np.inf
        # self.observation_space = spaces.Box(low=low, high=high)

        self.controller = SawyerIKController(
            bullet_data_path=os.path.join(robosuite.models.assets_root,
                                          "bullet_data"),
            robot_jpos_getter=self._robot_jpos_getter,
        )
        super().__init__(
            gripper_type=gripper_type,
            gripper_visualization=gripper_visualization,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            use_camera_obs=use_camera_obs,
            camera_name=camera_name,
            camera_height=camera_height,
            camera_width=camera_width,
            camera_depth=camera_depth,
        )
Example #16
0
    def __init__(
        self,
        robots,
        controller_configs=None,
        gripper_types="default",
        gripper_visualizations=False,
        initialization_noise="default",
        use_latch=True,
        use_camera_obs=True,
        use_object_obs=True,
        reward_scale=1.0,
        reward_shaping=False,
        placement_initializer=None,
        use_indicator_object=False,
        has_renderer=False,
        has_offscreen_renderer=True,
        render_camera="frontview",
        render_collision_mesh=False,
        render_visual_mesh=True,
        control_freq=10,
        horizon=1000,
        ignore_done=False,
        hard_reset=True,
        camera_names="agentview",
        camera_heights=256,
        camera_widths=256,
        camera_depths=False,
    ):
        # First, verify that only one robot is being inputted
        self._check_robot_configuration(robots)

        # settings for table top (hardcoded since it's not an essential part of the environment)
        self.table_full_size = (0.8, 0.3, 0.05)
        self.table_offset = (-0.2, -0.35, 0.8)

        # reward configuration
        self.use_latch = use_latch
        self.reward_scale = reward_scale
        self.reward_shaping = reward_shaping

        # whether to use ground-truth object states
        self.use_object_obs = use_object_obs

        # object placement initializer
        if placement_initializer:
            self.placement_initializer = placement_initializer
        else:
            self.placement_initializer = UniformRandomSampler(
                x_range=[0.07, 0.09],
                y_range=[-0.01, 0.01],
                ensure_object_boundary_in_range=False,
                rotation=(-np.pi / 2. - 0.25, -np.pi / 2.),
                rotation_axis='z',
            )

        super().__init__(
            robots=robots,
            controller_configs=controller_configs,
            gripper_types=gripper_types,
            gripper_visualizations=gripper_visualizations,
            initialization_noise=initialization_noise,
            use_camera_obs=use_camera_obs,
            use_indicator_object=use_indicator_object,
            has_renderer=has_renderer,
            has_offscreen_renderer=has_offscreen_renderer,
            render_camera=render_camera,
            render_collision_mesh=render_collision_mesh,
            render_visual_mesh=render_visual_mesh,
            control_freq=control_freq,
            horizon=horizon,
            ignore_done=ignore_done,
            hard_reset=hard_reset,
            camera_names=camera_names,
            camera_heights=camera_heights,
            camera_widths=camera_widths,
            camera_depths=camera_depths,
        )
def run_experiment(variant):
    sub_level_policies_paths = []
    args = arg()

    if args.domain == 'sawyer-reach':
        print("Composition Reach")
        goal_size = 0
        sub_level_policies_paths.append("ikx")
        sub_level_policies_paths.append("iky")
        sub_level_policies_paths.append("ikz")
        random_arm_init = [-0.1, 0.1]
        render = False
        reward_shaping = True
        horizon = 250
        env = normalize(
            CRLWrapper(
                IKWrapper(
                    SawyerReach(
                        # playable params
                        random_arm_init=random_arm_init,
                        has_renderer=render,
                        reward_shaping=reward_shaping,
                        horizon=horizon,

                        # constant params
                        has_offscreen_renderer=False,
                        use_camera_obs=False,
                        use_object_obs=True,
                        control_freq=100,
                    ))))
        ep_length = 1500

    elif args.domain == 'sawyer-reach-pick':
        print("Composition Reach and Pick")
        goal_size = 3
        sub_level_policies_paths.append(
            "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl")
        sub_level_policies_paths.append(
            "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl")

        render = False

        random_arm_init = [-0.0001, 0.0001]
        reward_shaping = False
        horizon = 1000
        env = normalize(
            CRLWrapper(
                SawyerReachPick(
                    # playable params
                    random_arm_init=random_arm_init,
                    has_renderer=render,
                    reward_shaping=reward_shaping,
                    horizon=horizon,

                    # constant params
                    has_offscreen_renderer=False,
                    use_camera_obs=False,
                    use_object_obs=True,
                    control_freq=100,
                )))
        ep_length = 1500

    elif args.domain == 'sawyer-reach-pick-simple':
        print("Composition Reach and Pick Simple")
        goal_size = 3
        sub_level_policies_paths.append(
            "log/prim/pick/2019-08-14-18-18-17-370041-PDT/itr_2000.pkl")
        sub_level_policies_paths.append(
            "log/prim/reach/2019-08-20-15-52-39-191438-PDT/itr_2000.pkl")

        render = False

        random_arm_init = [-0.0001, 0.0001]
        reward_shaping = False
        horizon = 500
        env = normalize(
            CRLWrapper(
                SawyerReachPick(
                    # playable params
                    random_arm_init=random_arm_init,
                    has_renderer=render,
                    reward_shaping=reward_shaping,
                    horizon=horizon,
                    placement_initializer=UniformRandomSampler(
                        x_range=[-0.01, 0.01],
                        y_range=[-0.01, 0.01],
                        ensure_object_boundary_in_range=False,
                        z_rotation=None,
                    ),
                    # constant params
                    has_offscreen_renderer=False,
                    use_camera_obs=False,
                    use_object_obs=True,
                    control_freq=100,
                )))
        ep_length = 3000
    else:
        raise ValueError("Domain not available")

    if args.demo:
        pool = DemoReplayBuffer(
            env_spec=env.spec,
            max_replay_buffer_size=1e6,
            seq_len=len(sub_level_policies_paths),
        )
    else:
        pool = SimpleReplayBuffer(
            env_spec=env.spec,
            max_replay_buffer_size=1e6,
            seq_len=len(sub_level_policies_paths),
        )

    sampler = SimpleSampler(
        max_path_length=horizon - 1,  # should be same as horizon
        min_pool_size=1000,
        batch_size=256)

    base_kwargs = dict(
        epoch_length=ep_length,
        n_epochs=5e3,
        # n_epochs=5,
        n_train_repeat=1,
        eval_render=False,
        eval_n_episodes=1,
        eval_deterministic=True,
        sampler=sampler,
        use_demos=args.demo,
    )
    M = 128
    qf1 = NNQFunction(env_spec=env.spec, hidden_layer_sizes=(M, M), name='qf1')
    qf2 = NNQFunction(env_spec=env.spec, hidden_layer_sizes=(M, M), name='qf2')
    vf = NNVFunction(env_spec=env.spec, hidden_layer_sizes=(M, M))

    initial_exploration_policy = UniformPolicy(env_spec=env.spec)
    policy = GaussianPtrPolicy(
        env_spec=env.spec,
        hidden_layer_sizes=(M, M),
        reparameterize=True,
        reg=1e-3,
    )

    algorithm = SAC(
        base_kwargs=base_kwargs,
        env=env,
        g=goal_size,
        policy=policy,
        sub_level_policies_paths=sub_level_policies_paths,
        initial_exploration_policy=initial_exploration_policy,
        pool=pool,
        qf1=qf1,
        qf2=qf2,
        vf=vf,
        lr=3e-4,
        scale_reward=5,
        discount=0.99,
        tau=0.005,
        reparameterize=True,
        target_update_interval=1,
        action_prior='uniform',
        save_full_state=False,
    )
    algorithm._sess.run(tf.global_variables_initializer())
    algorithm.train()