Ejemplo n.º 1
0
    def reset(self, deterministic=False):
        """
        Sets initial pose of arm and grippers. Overrides gripper joint configuration if we're using a
        deterministic reset (e.g.: hard reset from xml file)

        Args:
            deterministic (bool): If true, will not randomize initializations within the sim
        """
        # First, run the superclass method to reset the position and controller
        super().reset(deterministic)

        if not deterministic:
            # Now, reset the gripper if necessary
            for arm in self.arms:
                if self.has_gripper[arm]:
                    self.sim.data.qpos[self._ref_gripper_joint_pos_indexes[
                        arm]] = self.gripper[arm].init_qpos

        # Setup buffers to hold recent values
        self.recent_qpos = DeltaBuffer(dim=len(self.joint_indexes))
        self.recent_actions = DeltaBuffer(dim=self.action_dim)
        self.recent_torques = DeltaBuffer(dim=len(self.joint_indexes))

        # Setup arm-specific values
        for arm in self.arms:
            # Update base pos / ori references in controller (technically only needs to be called once)
            self.controller[arm].update_base_pose(self.base_pos, self.base_ori)
            # Setup buffers for eef values
            self.recent_ee_forcetorques[arm] = DeltaBuffer(dim=6)
            self.recent_ee_pose[arm] = DeltaBuffer(dim=7)
            self.recent_ee_vel[arm] = DeltaBuffer(dim=6)
            self.recent_ee_vel_buffer[arm] = RingBuffer(dim=6, length=10)
            self.recent_ee_acc[arm] = DeltaBuffer(dim=6)
Ejemplo n.º 2
0
    def reset(self, deterministic=False):
        """
        Sets initial pose of arm and grippers. Overrides robot joint configuration if we're using a
        deterministic reset (e.g.: hard reset from xml file)

        Args:
            deterministic (bool): If true, will not randomize initializations within the sim

        Raises:
            ValueError: [Invalid noise type]
        """
        init_qpos = np.array(self.init_qpos)
        if not deterministic:
            # Determine noise
            if self.initialization_noise["type"] == "gaussian":
                noise = np.random.randn(len(
                    self.init_qpos)) * self.initialization_noise["magnitude"]
            elif self.initialization_noise["type"] == "uniform":
                noise = np.random.uniform(-1.0, 1.0, len(
                    self.init_qpos)) * self.initialization_noise["magnitude"]
            else:
                raise ValueError(
                    "Error: Invalid noise type specified. Options are 'gaussian' or 'uniform'."
                )
            init_qpos += noise

        # Set initial position in sim
        self.sim.data.qpos[self._ref_joint_pos_indexes] = init_qpos

        # Load controllers
        self._load_controller()

        # Update base pos / ori references
        self.base_pos = self.sim.data.get_body_xpos(self.robot_model.root_body)
        self.base_ori = T.mat2quat(
            self.sim.data.get_body_xmat(self.robot_model.root_body).reshape(
                (3, 3)))

        # Setup buffers to hold recent values
        self.recent_qpos = DeltaBuffer(dim=len(self.joint_indexes))
        self.recent_actions = DeltaBuffer(dim=self.action_dim)
        self.recent_torques = DeltaBuffer(dim=len(self.joint_indexes))
Ejemplo n.º 3
0
    def reset(self, deterministic=False):
        """
        Sets initial pose of arm and grippers. Overrides gripper joint configuration if we're using a
        deterministic reset (e.g.: hard reset from xml file)

        Args:
            deterministic (bool): If true, will not randomize initializations within the sim
        """
        # First, run the superclass method to reset the position and controller
        super().reset(deterministic)

        # Update base pos / ori references in controller
        self.controller.update_base_pose(self.base_pos, self.base_ori)

        # Setup buffers to hold recent values
        self.recent_qpos = DeltaBuffer(dim=len(self.joint_indexes))
        self.recent_actions = DeltaBuffer(dim=self.action_dim)
        self.recent_torques = DeltaBuffer(dim=len(self.joint_indexes))
class SingleArm(Robot):
    """
    Initializes a single-armed robot simulation object.

    Args:
        robot_type (str): Specification for specific robot arm to be instantiated within this env (e.g: "Panda")

        idn (int or str): Unique ID of this robot. Should be different from others

        controller_config (dict): If set, contains relevant controller parameters for creating a custom controller.
            Else, uses the default controller for this specific task

        initial_qpos (sequence of float): If set, determines the initial joint positions of the robot to be
            instantiated for the task

        initialization_noise (dict): Dict containing the initialization noise parameters. The expected keys and
            corresponding value types are specified below:

            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to "None" or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"

            :Note: Specifying None will automatically create the required dict with "magnitude" set to 0.0

        gripper_type (str): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default gripper associated
            within the 'robot' specification. None removes the gripper, and any other (valid) model overrides the
            default gripper

        gripper_visualization (bool): True if using gripper visualization.
            Useful for teleoperation.

        control_freq (float): how many control signals to receive
            in every second. This sets the amount of simulation time
            that passes between every action input.
    """
    def __init__(self,
                 robot_type: str,
                 idn=0,
                 controller_config=None,
                 initial_qpos=None,
                 initialization_noise=None,
                 gripper_type="default",
                 gripper_visualization=False,
                 control_freq=10):

        self.controller = None
        self.controller_config = copy.deepcopy(controller_config)
        self.gripper_type = gripper_type
        self.has_gripper = self.gripper_type is not None
        self.gripper_visualization = gripper_visualization
        self.control_freq = control_freq

        self.gripper = None  # Gripper class
        self.gripper_joints = None  # xml joint names for gripper
        self._ref_gripper_joint_pos_indexes = None  # xml gripper joint position indexes in mjsim
        self._ref_gripper_joint_vel_indexes = None  # xml gripper joint velocity indexes in mjsim
        self._ref_joint_gripper_actuator_indexes = None  # xml gripper (pos) actuator indexes for robot in mjsim
        self.eef_rot_offset = None  # rotation offsets from final arm link to gripper (quat)
        self.eef_site_id = None  # xml element id for eef in mjsim
        self.eef_cylinder_id = None  # xml element id for eef cylinder in mjsim
        self.torques = None  # Current torques being applied

        self.recent_qpos = None  # Current and last robot arm qpos
        self.recent_actions = None  # Current and last action applied
        self.recent_torques = None  # Current and last torques applied
        self.recent_ee_forcetorques = None  # Current and last forces / torques sensed at eef
        self.recent_ee_pose = None  # Current and last eef pose (pos + ori (quat))
        self.recent_ee_vel = None  # Current and last eef velocity
        self.recent_ee_vel_buffer = None  # RingBuffer holding prior 10 values of velocity values
        self.recent_ee_acc = None  # Current and last eef acceleration

        super().__init__(
            robot_type=robot_type,
            idn=idn,
            initial_qpos=initial_qpos,
            initialization_noise=initialization_noise,
        )

    def _load_controller(self):
        """
        Loads controller to be used for dynamic trajectories
        """
        # First, load the default controller if none is specified
        if not self.controller_config:
            # Need to update default for a single agent
            controller_path = os.path.join(
                os.path.dirname(__file__), '..',
                'controllers/config/{}.json'.format(
                    self.robot_model.default_controller_config))
            self.controller_config = load_controller_config(
                custom_fpath=controller_path)

        # Assert that the controller config is a dict file:
        #             NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY,
        #                                           OSC_POSITION, OSC_POSE, IK_POSE}
        assert type(self.controller_config) == dict, \
            "Inputted controller config must be a dict! Instead, got type: {}".format(type(self.controller_config))

        # Add to the controller dict additional relevant params:
        #   the robot name, mujoco sim, eef_name, joint_indexes, timestep (model) freq,
        #   policy (control) freq, and ndim (# joints)
        self.controller_config["robot_name"] = self.name
        self.controller_config["sim"] = self.sim
        self.controller_config["eef_name"] = self.gripper.visualization_sites[
            "grip_site"]
        self.controller_config["eef_rot_offset"] = self.eef_rot_offset
        self.controller_config["joint_indexes"] = {
            "joints": self.joint_indexes,
            "qpos": self._ref_joint_pos_indexes,
            "qvel": self._ref_joint_vel_indexes
        }
        self.controller_config["actuator_range"] = self.torque_limits
        self.controller_config["policy_freq"] = self.control_freq
        self.controller_config["ndim"] = len(self.robot_joints)

        # Instantiate the relevant controller
        self.controller = controller_factory(self.controller_config["type"],
                                             self.controller_config)

    def load_model(self):
        """
        Loads robot and optionally add grippers.
        """
        # First, run the superclass method to load the relevant model
        super().load_model()

        # Verify that the loaded model is of the correct type for this robot
        if self.robot_model.arm_type != "single":
            raise TypeError(
                "Error loading robot model: Incompatible arm type specified for this robot. "
                "Requested model arm type: {}, robot arm type: {}".format(
                    self.robot_model.arm_type, type(self)))

        # Now, load the gripper if necessary
        if self.has_gripper:
            if self.gripper_type == 'default':
                # Load the default gripper from the robot file
                self.gripper = gripper_factory(self.robot_model.gripper,
                                               idn=self.idn)
            else:
                # Load user-specified gripper
                self.gripper = gripper_factory(self.gripper_type, idn=self.idn)
        else:
            # Load null gripper
            self.gripper = gripper_factory(None, idn=self.idn)
        # Grab eef rotation offset
        self.eef_rot_offset = T.quat_multiply(
            self.robot_model.hand_rotation_offset,
            self.gripper.rotation_offset)
        # Use gripper visualization if necessary
        if not self.gripper_visualization:
            self.gripper.hide_visualization()
        self.robot_model.add_gripper(self.gripper)

    def reset(self, deterministic=False):
        """
        Sets initial pose of arm and grippers. Overrides gripper joint configuration if we're using a
        deterministic reset (e.g.: hard reset from xml file)

        Args:
            deterministic (bool): If true, will not randomize initializations within the sim
        """
        # First, run the superclass method to reset the position and controller
        super().reset(deterministic)

        if not deterministic:
            # Now, reset the gripper if necessary
            if self.has_gripper:
                self.sim.data.qpos[
                    self.
                    _ref_gripper_joint_pos_indexes] = self.gripper.init_qpos

        # Update base pos / ori references in controller
        self.controller.update_base_pose(self.base_pos, self.base_ori)

        # Setup buffers to hold recent values
        self.recent_qpos = DeltaBuffer(dim=len(self.joint_indexes))
        self.recent_actions = DeltaBuffer(dim=self.action_dim)
        self.recent_torques = DeltaBuffer(dim=len(self.joint_indexes))
        self.recent_ee_forcetorques = DeltaBuffer(dim=6)
        self.recent_ee_pose = DeltaBuffer(dim=7)
        self.recent_ee_vel = DeltaBuffer(dim=6)
        self.recent_ee_vel_buffer = RingBuffer(dim=6, length=10)
        self.recent_ee_acc = DeltaBuffer(dim=6)

    def setup_references(self):
        """
        Sets up necessary reference for robots, grippers, and objects.

        Note that this should get called during every reset from the environment
        """
        # First, run the superclass method to setup references for joint-related values / indexes
        super().setup_references()

        # Now, add references to gripper if necessary
        # indices for grippers in qpos, qvel
        if self.has_gripper:
            self.gripper_joints = list(self.gripper.joints)
            self._ref_gripper_joint_pos_indexes = [
                self.sim.model.get_joint_qpos_addr(x)
                for x in self.gripper_joints
            ]
            self._ref_gripper_joint_vel_indexes = [
                self.sim.model.get_joint_qvel_addr(x)
                for x in self.gripper_joints
            ]
            self._ref_joint_gripper_actuator_indexes = [
                self.sim.model.actuator_name2id(actuator)
                for actuator in self.gripper.actuators
            ]

        # IDs of sites for eef visualization
        self.eef_site_id = self.sim.model.site_name2id(
            self.gripper.visualization_sites["grip_site"])
        self.eef_cylinder_id = self.sim.model.site_name2id(
            self.gripper.visualization_sites["grip_cylinder"])

    def control(self, action, policy_step=False):
        """
        Actuate the robot with the
        passed joint velocities and gripper control.

        Args:
            action (np.array): The control to apply to the robot. The first @self.robot_model.dof dimensions should be
                the desired normalized joint velocities and if the robot has a gripper, the next @self.gripper.dof
                dimensions should be actuation controls for the gripper.
            policy_step (bool): Whether a new policy step (action) is being taken

        Raises:
            AssertionError: [Invalid action dimension]
        """

        # clip actions into valid range
        assert len(action) == self.action_dim, \
            "environment got invalid action dimension -- expected {}, got {}".format(
                self.action_dim, len(action))

        gripper_action = None
        if self.has_gripper:
            gripper_action = action[
                self.controller.
                control_dim:]  # all indexes past controller dimension indexes
            arm_action = action[:self.controller.control_dim]
        else:
            arm_action = action

        # Update the controller goal if this is a new policy step
        if policy_step:
            self.controller.set_goal(arm_action)

        # Now run the controller for a step
        torques = self.controller.run_controller()

        # Clip the torques
        low, high = self.torque_limits
        self.torques = np.clip(torques, low, high)

        # Get gripper action, if applicable
        if self.has_gripper:
            self.grip_action(gripper_action)

        # Apply joint torque control
        self.sim.data.ctrl[
            self._ref_joint_torq_actuator_indexes] = self.torques

        # If this is a policy step, also update buffers holding recent values of interest
        if policy_step:
            # Update proprioceptive values
            self.recent_qpos.push(self._joint_positions)
            self.recent_actions.push(action)
            self.recent_torques.push(self.torques)
            self.recent_ee_forcetorques.push(
                np.concatenate((self.ee_force, self.ee_torque)))
            self.recent_ee_pose.push(
                np.concatenate((self.controller.ee_pos,
                                T.mat2quat(self.controller.ee_ori_mat))))
            self.recent_ee_vel.push(
                np.concatenate(
                    (self.controller.ee_pos_vel, self.controller.ee_ori_vel)))

            # Estimation of eef acceleration (averaged derivative of recent velocities)
            self.recent_ee_vel_buffer.push(
                np.concatenate(
                    (self.controller.ee_pos_vel, self.controller.ee_ori_vel)))
            diffs = np.vstack([
                self.recent_ee_acc.current, self.control_freq *
                np.diff(self.recent_ee_vel_buffer.buf, axis=0)
            ])
            ee_acc = np.array([
                np.convolve(col, np.ones(10) / 10., mode='valid')[0]
                for col in diffs.transpose()
            ])
            self.recent_ee_acc.push(ee_acc)

    def grip_action(self, gripper_action):
        """
        Executes gripper @action for specified @arm

        Args:
            gripper_action (float): Value between [-1,1] to send to gripper
        """
        gripper_action_actual = self.gripper.format_action(gripper_action)
        # rescale normalized gripper action to control ranges
        ctrl_range = self.sim.model.actuator_ctrlrange[
            self._ref_joint_gripper_actuator_indexes]
        bias = 0.5 * (ctrl_range[:, 1] + ctrl_range[:, 0])
        weight = 0.5 * (ctrl_range[:, 1] - ctrl_range[:, 0])
        applied_gripper_action = bias + weight * gripper_action_actual
        self.sim.data.ctrl[
            self._ref_joint_gripper_actuator_indexes] = applied_gripper_action

    def visualize_gripper(self):
        """
        Visualizes the gripper site(s) if applicable.
        """
        if self.gripper_visualization:
            # By default, color the ball red
            self.sim.model.site_rgba[self.eef_site_id] = [1., 0., 0., 1.]

    def get_observations(self, di: OrderedDict):
        """
        Returns an OrderedDict containing robot observations [(name_string, np.array), ...].

        Important keys:

            `'robot-state'`: contains robot-centric information.

        Args:
            di (OrderedDict): Current set of observations from the environment

        Returns:
            OrderedDict: Augmented set of observations that include this robot's proprioceptive observations
        """
        # Get prefix from robot model to avoid naming clashes for multiple robots
        pf = self.robot_model.naming_prefix

        # proprioceptive features
        di[pf + "joint_pos"] = np.array(
            [self.sim.data.qpos[x] for x in self._ref_joint_pos_indexes])
        di[pf + "joint_vel"] = np.array(
            [self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes])

        robot_states = [
            np.sin(di[pf + "joint_pos"]),
            np.cos(di[pf + "joint_pos"]),
            di[pf + "joint_vel"],
        ]

        # Add in eef pos / qpos
        di[pf + "eef_pos"] = np.array(
            self.sim.data.site_xpos[self.eef_site_id])
        di[pf + "eef_quat"] = T.convert_quat(self.sim.data.get_body_xquat(
            self.robot_model.eef_name),
                                             to="xyzw")
        robot_states.extend([di[pf + "eef_pos"], di[pf + "eef_quat"]])

        # add in gripper information
        if self.has_gripper:
            di[pf + "gripper_qpos"] = np.array([
                self.sim.data.qpos[x]
                for x in self._ref_gripper_joint_pos_indexes
            ])
            di[pf + "gripper_qvel"] = np.array([
                self.sim.data.qvel[x]
                for x in self._ref_gripper_joint_vel_indexes
            ])
            robot_states.extend(
                [di[pf + "gripper_qpos"], di[pf + "gripper_qvel"]])

        di[pf + "robot-state"] = np.concatenate(robot_states)
        return di

    @property
    def action_limits(self):
        """
        Action lower/upper limits per dimension.

        Returns:
            2-tuple:

                - (np.array) minimum (low) action values
                - (np.array) maximum (high) action values
        """
        # Action limits based on controller limits
        low, high = ([-1] * self.gripper.dof,
                     [1] * self.gripper.dof) if self.has_gripper else ([], [])
        low_c, high_c = self.controller.control_limits
        low = np.concatenate([low_c, low])
        high = np.concatenate([high_c, high])

        return low, high

    @property
    def torque_limits(self):
        """
        Torque lower/upper limits per dimension.

        Returns:
            2-tuple:

                - (np.array) minimum (low) torque values
                - (np.array) maximum (high) torque values
        """
        # Torque limit values pulled from relevant robot.xml file
        low = self.sim.model.actuator_ctrlrange[
            self._ref_joint_torq_actuator_indexes, 0]
        high = self.sim.model.actuator_ctrlrange[
            self._ref_joint_torq_actuator_indexes, 1]

        return low, high

    @property
    def action_dim(self):
        """
        Action space dimension for this robot (controller dimension + gripper dof)

        Returns:
            int: action dimension
        """
        return self.controller.control_dim + self.gripper.dof if self.has_gripper else self.controller.control_dim

    @property
    def dof(self):
        """
        Returns:
            int: degrees of freedom of the robot (with grippers).
        """
        # Get the dof of the base robot model
        dof = super().dof
        if self.has_gripper:
            dof += self.gripper.dof
        return dof

    @property
    def js_energy(self):
        """
        Returns:
            np.array: the energy consumed by each joint between previous and current steps
        """
        # We assume in the motors torque is proportional to current (and voltage is constant)
        # In that case the amount of power scales proportional to the torque and the energy is the
        # time integral of that
        # Note that we use mean torque
        return np.abs((1.0 / self.control_freq) * self.recent_torques.average)

    @property
    def ee_ft_integral(self):
        """
        Returns:
            np.array: the integral over time of the applied ee force-torque
        """
        return np.abs(
            (1.0 / self.control_freq) * self.recent_ee_forcetorques.average)

    @property
    def ee_force(self):
        """
        Returns:
            np.array: force applied at the force sensor at the robot arm's eef
        """
        return self.get_sensor_measurement(self.gripper.sensors["force_ee"])

    @property
    def ee_torque(self):
        """
        Returns torque applied at the torque sensor at the robot arm's eef
        """
        return self.get_sensor_measurement(self.gripper.sensors["torque_ee"])

    @property
    def _right_hand_pose(self):
        """
        Returns:
            np.array: (4,4) array corresponding to the eef pose in base frame of robot.
        """
        return self.pose_in_base_from_name(self.robot_model.eef_name)

    @property
    def _right_hand_quat(self):
        """
        Returns:
            np.array: (x,y,z,w) eef quaternion in base frame of robot.
        """
        return T.mat2quat(self._right_hand_orn)

    @property
    def _right_hand_total_velocity(self):
        """
        Returns:
            np.array: 6-array representing the total eef velocity (linear + angular) in the base frame
        """

        # Use jacobian to translate joint velocities to end effector velocities.
        Jp = self.sim.data.get_body_jacp(self.robot_model.eef_name).reshape(
            (3, -1))
        Jp_joint = Jp[:, self._ref_joint_vel_indexes]

        Jr = self.sim.data.get_body_jacr(self.robot_model.eef_name).reshape(
            (3, -1))
        Jr_joint = Jr[:, self._ref_joint_vel_indexes]

        eef_lin_vel = Jp_joint.dot(self._joint_velocities)
        eef_rot_vel = Jr_joint.dot(self._joint_velocities)
        return np.concatenate([eef_lin_vel, eef_rot_vel])

    @property
    def _right_hand_pos(self):
        """
        Returns:
            np.array: 3-array representing the position of eef in base frame of robot.
        """
        eef_pose_in_base = self._right_hand_pose
        return eef_pose_in_base[:3, 3]

    @property
    def _right_hand_orn(self):
        """
        Returns:
            np.array: (3,3) array representing the orientation of eef in base frame of robot as a rotation matrix.
        """
        eef_pose_in_base = self._right_hand_pose
        return eef_pose_in_base[:3, :3]

    @property
    def _right_hand_vel(self):
        """
        Returns:
            np.array: (x,y,z) velocity of eef in base frame of robot.
        """
        return self._right_hand_total_velocity[:3]

    @property
    def _right_hand_ang_vel(self):
        """
        Returns:
            np.array: (ax,ay,az) angular velocity of eef in base frame of robot.
        """
        return self._right_hand_total_velocity[3:]
Ejemplo n.º 5
0
class Bimanual(Robot):
    """
    Initializes a bimanual robot simulation object.

    Args:
        robot_type (str): Specification for specific robot arm to be instantiated within this env (e.g: "Panda")

        idn (int or str): Unique ID of this robot. Should be different from others

        controller_config (dict or list of dict --> dict of dict): If set, contains relevant controller parameters
            for creating custom controllers. Else, uses the default controller for this specific task. Should either
            be single dict if same controller is to be used for both robot arms or else it should be a list of length 2.

            :NOTE: In the latter case, assumes convention of [right, left]

        initial_qpos (sequence of float): If set, determines the initial joint positions of the robot to be
            instantiated for the task

        initialization_noise (dict): Dict containing the initialization noise parameters. The expected keys and
            corresponding value types are specified below:

            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to "None" or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"

            :Note: Specifying None will automatically create the required dict with "magnitude" set to 0.0

        gripper_type (str or list of str --> dict): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default gripper associated
            within the 'robot' specification. None removes the gripper, and any other (valid) model overrides the
            default gripper. Should either be single str if same gripper type is to be used for both arms or else
            it should be a list of length 2

            :NOTE: In the latter case, assumes convention of [right, left]

        gripper_visualization (bool or list of bool --> dict): True if using gripper visualization.
            Useful for teleoperation. Should either be single bool if gripper visualization is to be used for both
            arms or else it should be a list of length 2

            :NOTE: In the latter case, assumes convention of [right, left]

        control_freq (float): how many control signals to receive
            in every second. This sets the amount of simulation time
            that passes between every action input.
    """
    def __init__(self,
                 robot_type: str,
                 idn=0,
                 controller_config=None,
                 initial_qpos=None,
                 initialization_noise=None,
                 gripper_type="default",
                 gripper_visualization=False,
                 control_freq=10):

        self.controller = self._input2dict(None)
        self.controller_config = self._input2dict(
            copy.deepcopy(controller_config))
        self.gripper = self._input2dict(None)
        self.gripper_type = self._input2dict(gripper_type)
        self.has_gripper = self._input2dict([
            gripper_type is not None
            for _, gripper_type in self.gripper_type.items()
        ])
        self.gripper_visualization = self._input2dict(gripper_visualization)
        self.control_freq = control_freq

        self.gripper_joints = self._input2dict(
            None)  # xml joint names for gripper
        self._ref_gripper_joint_pos_indexes = self._input2dict(
            None)  # xml gripper joint position indexes in mjsim
        self._ref_gripper_joint_vel_indexes = self._input2dict(
            None)  # xml gripper joint velocity indexes in mjsim
        self._ref_joint_gripper_actuator_indexes = self._input2dict(
            None)  # xml gripper (pos) actuator indexes for robot in mjsim
        self.eef_rot_offset = self._input2dict(
            None)  # rotation offsets from final arm link to gripper (quat)
        self.eef_site_id = self._input2dict(
            None)  # xml element id for eef in mjsim
        self.eef_cylinder_id = self._input2dict(
            None)  # xml element id for eef cylinder in mjsim
        self.torques = None  # Current torques being applied

        self.recent_qpos = None  # Current and last robot arm qpos
        self.recent_actions = None  # Current and last action applied
        self.recent_torques = None  # Current and last torques applied
        self.recent_ee_forcetorques = self._input2dict(
            None)  # Current and last forces / torques sensed at eef
        self.recent_ee_pose = self._input2dict(
            None)  # Current and last eef pose (pos + ori (quat))
        self.recent_ee_vel = self._input2dict(
            None)  # Current and last eef velocity
        self.recent_ee_vel_buffer = self._input2dict(
            None)  # RingBuffer holding prior 10 values of velocity values
        self.recent_ee_acc = self._input2dict(
            None)  # Current and last eef acceleration

        super().__init__(
            robot_type=robot_type,
            idn=idn,
            initial_qpos=initial_qpos,
            initialization_noise=initialization_noise,
        )

    def _load_controller(self):
        """
        Loads controller to be used for dynamic trajectories
        """
        # Flag for loading urdf once (only applicable for IK controllers)
        urdf_loaded = False

        # Load controller configs for both left and right arm
        for arm in self.arms:
            # First, load the default controller if none is specified
            if not self.controller_config[arm]:
                # Need to update default for a single agent
                controller_path = os.path.join(
                    os.path.dirname(__file__), '..',
                    'controllers/config/{}.json'.format(
                        self.robot_model.default_controller_config[arm]))
                self.controller_config[arm] = load_controller_config(
                    custom_fpath=controller_path)

            # Assert that the controller config is a dict file:
            #             NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY,
            #                                           OSC_POSITION, OSC_POSE, IK_POSE}
            assert type(self.controller_config[arm]) == dict, \
                "Inputted controller config must be a dict! Instead, got type: {}".format(
                    type(self.controller_config[arm]))

            # Add to the controller dict additional relevant params:
            #   the robot name, mujoco sim, eef_name, actuator_range, joint_indexes, timestep (model) freq,
            #   policy (control) freq, and ndim (# joints)
            self.controller_config[arm]["robot_name"] = self.name
            self.controller_config[arm]["sim"] = self.sim
            self.controller_config[arm]["eef_name"] = self.gripper[
                arm].visualization_sites["grip_site"]
            self.controller_config[arm][
                "eef_rot_offset"] = self.eef_rot_offset[arm]
            self.controller_config[arm]["ndim"] = self._joint_split_idx
            self.controller_config[arm]["policy_freq"] = self.control_freq
            (start, end) = (None,
                            self._joint_split_idx) if arm == "right" else (
                                self._joint_split_idx, None)
            self.controller_config[arm]["joint_indexes"] = {
                "joints": self.joint_indexes[start:end],
                "qpos": self._ref_joint_pos_indexes[start:end],
                "qvel": self._ref_joint_vel_indexes[start:end]
            }
            self.controller_config[arm]["actuator_range"] = (
                self.torque_limits[0][start:end],
                self.torque_limits[1][start:end])

            # Only load urdf the first time this controller gets called
            self.controller_config[arm][
                "load_urdf"] = True if not urdf_loaded else False
            urdf_loaded = True

            # Instantiate the relevant controller
            self.controller[arm] = controller_factory(
                self.controller_config[arm]["type"],
                self.controller_config[arm])

    def load_model(self):
        """
        Loads robot and optionally add grippers.
        """
        # First, run the superclass method to load the relevant model
        super().load_model()

        # Verify that the loaded model is of the correct type for this robot
        if self.robot_model.arm_type != "bimanual":
            raise TypeError(
                "Error loading robot model: Incompatible arm type specified for this robot. "
                "Requested model arm type: {}, robot arm type: {}".format(
                    self.robot_model.arm_type, type(self)))

        # Now, load the gripper if necessary
        for arm in self.arms:
            if self.has_gripper[arm]:
                if self.gripper_type[arm] == 'default':
                    # Load the default gripper from the robot file
                    self.gripper[arm] = gripper_factory(
                        self.robot_model.gripper[arm],
                        idn="_".join((str(self.idn), arm)))
                else:
                    # Load user-specified gripper
                    self.gripper[arm] = gripper_factory(self.gripper_type[arm],
                                                        idn="_".join(
                                                            (str(self.idn),
                                                             arm)))
            else:
                # Load null gripper
                self.gripper[arm] = gripper_factory(None,
                                                    idn="_".join(
                                                        (str(self.idn), arm)))
            # Grab eef rotation offset
            self.eef_rot_offset[arm] = T.quat_multiply(
                self.robot_model.hand_rotation_offset[arm],
                self.gripper[arm].rotation_offset)
            # Use gripper visualization if necessary
            if not self.gripper_visualization[arm]:
                self.gripper[arm].hide_visualization()
            self.robot_model.add_gripper(self.gripper[arm],
                                         self.robot_model.eef_name[arm])

    def reset(self, deterministic=False):
        """
        Sets initial pose of arm and grippers. Overrides gripper joint configuration if we're using a
        deterministic reset (e.g.: hard reset from xml file)

        Args:
            deterministic (bool): If true, will not randomize initializations within the sim
        """
        # First, run the superclass method to reset the position and controller
        super().reset(deterministic)

        if not deterministic:
            # Now, reset the gripper if necessary
            for arm in self.arms:
                if self.has_gripper[arm]:
                    self.sim.data.qpos[self._ref_gripper_joint_pos_indexes[
                        arm]] = self.gripper[arm].init_qpos

        # Setup buffers to hold recent values
        self.recent_qpos = DeltaBuffer(dim=len(self.joint_indexes))
        self.recent_actions = DeltaBuffer(dim=self.action_dim)
        self.recent_torques = DeltaBuffer(dim=len(self.joint_indexes))

        # Setup arm-specific values
        for arm in self.arms:
            # Update base pos / ori references in controller (technically only needs to be called once)
            self.controller[arm].update_base_pose(self.base_pos, self.base_ori)
            # Setup buffers for eef values
            self.recent_ee_forcetorques[arm] = DeltaBuffer(dim=6)
            self.recent_ee_pose[arm] = DeltaBuffer(dim=7)
            self.recent_ee_vel[arm] = DeltaBuffer(dim=6)
            self.recent_ee_vel_buffer[arm] = RingBuffer(dim=6, length=10)
            self.recent_ee_acc[arm] = DeltaBuffer(dim=6)

    def setup_references(self):
        """
        Sets up necessary reference for robots, grippers, and objects.

        Note that this should get called during every reset from the environment
        """
        # First, run the superclass method to setup references for joint-related values / indexes
        super().setup_references()

        # Now, add references to gripper if necessary
        # indices for grippers in qpos, qvel
        for arm in self.arms:
            if self.has_gripper[arm]:
                self.gripper_joints[arm] = list(self.gripper[arm].joints)
                self._ref_gripper_joint_pos_indexes[arm] = [
                    self.sim.model.get_joint_qpos_addr(x)
                    for x in self.gripper_joints[arm]
                ]
                self._ref_gripper_joint_vel_indexes[arm] = [
                    self.sim.model.get_joint_qvel_addr(x)
                    for x in self.gripper_joints[arm]
                ]
                self._ref_joint_gripper_actuator_indexes[arm] = [
                    self.sim.model.actuator_name2id(actuator)
                    for actuator in self.gripper[arm].actuators
                ]

            # IDs of sites for eef visualization
            self.eef_site_id[arm] = self.sim.model.site_name2id(
                self.gripper[arm].visualization_sites["grip_site"])
            self.eef_cylinder_id[arm] = self.sim.model.site_name2id(
                self.gripper[arm].visualization_sites["grip_cylinder"])

    def control(self, action, policy_step=False):
        """
        Actuate the robot with the
        passed joint velocities and gripper control.

        Args:
            action (np.array): The control to apply to the robot. The first @self.robot_model.dof dimensions should
                be the desired normalized joint velocities and if the robot has a gripper, the next @self.gripper.dof
                dimensions should be actuation controls for the gripper.

                :NOTE: Assumes inputted actions are of form:
                    [right_arm_control, right_gripper_control, left_arm_control, left_gripper_control]

            policy_step (bool): Whether a new policy step (action) is being taken

        Raises:
            AssertionError: [Invalid action dimension]
        """

        # clip actions into valid range
        assert len(action) == self.action_dim, \
            "environment got invalid action dimension -- expected {}, got {}".format(
                self.action_dim, len(action))

        self.torques = np.array([])
        # Now execute actions for each arm
        for arm in self.arms:
            # Make sure to split action space correctly
            (start, end) = (None,
                            self._action_split_idx) if arm == "right" else (
                                self._action_split_idx, None)
            sub_action = action[start:end]

            gripper_action = None
            if self.has_gripper[arm]:
                # get all indexes past controller dimension indexes
                gripper_action = sub_action[self.controller[arm].control_dim:]
                sub_action = sub_action[:self.controller[arm].control_dim]

            # Update the controller goal if this is a new policy step
            if policy_step:
                self.controller[arm].set_goal(sub_action)

            # Now run the controller for a step and add it to the torques
            self.torques = np.concatenate(
                (self.torques, self.controller[arm].run_controller()))

            # Get gripper action, if applicable
            if self.has_gripper[arm]:
                self.grip_action(gripper_action, arm)

        # Clip the torques
        low, high = self.torque_limits
        self.torques = np.clip(self.torques, low, high)

        # Apply joint torque control
        self.sim.data.ctrl[
            self._ref_joint_torq_actuator_indexes] = self.torques

        # If this is a policy step, also update buffers holding recent values of interest
        if policy_step:
            # Update proprioceptive values
            self.recent_qpos.push(self._joint_positions)
            self.recent_actions.push(action)
            self.recent_torques.push(self.torques)

            for arm in self.arms:
                # Update arm-specific proprioceptive values
                self.recent_ee_forcetorques[arm].push(
                    np.concatenate((self.ee_force[arm], self.ee_torque[arm])))
                self.recent_ee_pose[arm].push(
                    np.concatenate(
                        (self.controller[arm].ee_pos,
                         T.mat2quat(self.controller[arm].ee_ori_mat))))
                self.recent_ee_vel[arm].push(
                    np.concatenate((self.controller[arm].ee_pos_vel,
                                    self.controller[arm].ee_ori_vel)))

                # Estimation of eef acceleration (averaged derivative of recent velocities)
                self.recent_ee_vel_buffer[arm].push(
                    np.concatenate((self.controller[arm].ee_pos_vel,
                                    self.controller[arm].ee_ori_vel)))
                diffs = np.vstack([
                    self.recent_ee_acc[arm].current, self.control_freq *
                    np.diff(self.recent_ee_vel_buffer[arm].buf, axis=0)
                ])
                ee_acc = np.array([
                    np.convolve(col, np.ones(10) / 10., mode='valid')[0]
                    for col in diffs.transpose()
                ])
                self.recent_ee_acc[arm].push(ee_acc)

    def grip_action(self, gripper_action, arm):
        """
        Executes gripper @action for specified @arm

        Args:
            gripper_action (float): Value between [-1,1] to send to gripper
            arm (str): "left" or "right"; arm to execute action
        """
        gripper_action_actual = self.gripper[arm].format_action(gripper_action)
        # rescale normalized gripper action to control ranges
        ctrl_range = self.sim.model.actuator_ctrlrange[
            self._ref_joint_gripper_actuator_indexes[arm]]
        bias = 0.5 * (ctrl_range[:, 1] + ctrl_range[:, 0])
        weight = 0.5 * (ctrl_range[:, 1] - ctrl_range[:, 0])
        applied_gripper_action = bias + weight * gripper_action_actual
        self.sim.data.ctrl[self._ref_joint_gripper_actuator_indexes[
            arm]] = applied_gripper_action

    def visualize_gripper(self):
        """
        Visualizes the gripper site(s) if applicable.
        """
        for arm in self.arms:
            if self.gripper_visualization[arm]:
                # By default, color the ball red
                self.sim.model.site_rgba[self.eef_site_id[arm]] = [
                    1., 0., 0., 1.
                ]

    def get_observations(self, di: OrderedDict):
        """
        Returns an OrderedDict containing robot observations [(name_string, np.array), ...].

        Important keys:

            `'robot-state'`: contains robot-centric information.

        Args:
            di (OrderedDict): Current set of observations from the environment

        Returns:
            OrderedDict: Augmented set of observations that include this robot's proprioceptive observations
        """
        # Get prefix from robot model to avoid naming clashes for multiple robots
        pf = self.robot_model.naming_prefix

        # proprioceptive features
        di[pf + "joint_pos"] = np.array(
            [self.sim.data.qpos[x] for x in self._ref_joint_pos_indexes])
        di[pf + "joint_vel"] = np.array(
            [self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes])

        robot_states = [
            np.sin(di[pf + "joint_pos"]),
            np.cos(di[pf + "joint_pos"]),
            di[pf + "joint_vel"],
        ]

        for arm in self.arms:
            # Add in eef info
            di[pf + "_{}_".format(arm) + "eef_pos"] = np.array(
                self.sim.data.site_xpos[self.eef_site_id[arm]])
            di[pf + "_{}_".format(arm) + "eef_quat"] = T.convert_quat(
                self.sim.data.get_body_xquat(self.robot_model.eef_name[arm]),
                to="xyzw")
            robot_states.extend([
                di[pf + "_{}_".format(arm) + "eef_pos"],
                di[pf + "_{}_".format(arm) + "eef_quat"]
            ])

            # add in gripper information
            if self.has_gripper[arm]:
                di[pf + "_{}_".format(arm) + "gripper_qpos"] = np.array([
                    self.sim.data.qpos[x]
                    for x in self._ref_gripper_joint_pos_indexes[arm]
                ])
                di[pf + "_{}_".format(arm) + "gripper_qvel"] = np.array([
                    self.sim.data.qvel[x]
                    for x in self._ref_gripper_joint_vel_indexes[arm]
                ])
                robot_states.extend([
                    di[pf + "_{}_".format(arm) + "gripper_qpos"],
                    di[pf + "_{}_".format(arm) + "gripper_qvel"]
                ])

        di[pf + "robot-state"] = np.concatenate(robot_states)
        return di

    def _input2dict(self, inp):
        """
        Helper function that converts an input that is either a single value or a list into a dict with keys for
        each arm: "right", "left"

        Args:
            inp (str or list or None): Input value to be converted to dict

            :Note: If inp is a list, then assumes format is [right, left]

        Returns:
            dict: Inputs mapped for each robot arm
        """
        # First, convert to list if necessary
        if type(inp) is not list:
            inp = [inp for _ in range(2)]
        # Now, convert list to dict and return
        return {key: value for key, value in zip(self.arms, inp)}

    @property
    def arms(self):
        """
        Returns name of arms used as naming convention throughout this module

        Returns:
            2-tuple: ('right', 'left')
        """
        return "right", "left"

    @property
    def action_limits(self):
        """
        Action lower/upper limits per dimension.

        Returns:
            2-tuple:

                - (np.array) minimum (low) action values
                - (np.array) maximum (high) action values
        """
        # Action limits based on controller limits
        low, high = [], []
        for arm in self.arms:
            low_g, high_g = ([-1] * self.gripper[arm].dof, [1] * self.gripper[arm].dof) \
                if self.has_gripper[arm] else ([], [])
            low_c, high_c = self.controller[arm].control_limits
            low, high = np.concatenate([low, low_c, low_g]), \
                np.concatenate([high, high_c, high_g])
        return low, high

    @property
    def torque_limits(self):
        """
        Torque lower/upper limits per dimension.

        Returns:
            2-tuple:

                - (np.array) minimum (low) torque values
                - (np.array) maximum (high) torque values
        """
        # Torque limit values pulled from relevant robot.xml file
        low = self.sim.model.actuator_ctrlrange[
            self._ref_joint_torq_actuator_indexes, 0]
        high = self.sim.model.actuator_ctrlrange[
            self._ref_joint_torq_actuator_indexes, 1]

        return low, high

    @property
    def action_dim(self):
        """
        Action space dimension for this robot (controller dimension + gripper dof)

        Returns:
            int: action dimension
        """
        dim = 0
        for arm in self.arms:
            dim += self.controller[arm].control_dim + self.gripper[arm].dof if \
                self.has_gripper[arm] else self.controller[arm].control_dim
        return dim

    @property
    def dof(self):
        """
        Returns:
            int: degrees of freedom of the robot (with grippers).
        """
        # Get the dof of the base robot model
        dof = super().dof
        for arm in self.arms:
            if self.has_gripper[arm]:
                dof += self.gripper[arm].dof
        return dof

    @property
    def js_energy(self):
        """
        Returns:
            np.array: the energy consumed by each joint between previous and current steps
        """
        # We assume in the motors torque is proportional to current (and voltage is constant)
        # In that case the amount of power scales proportional to the torque and the energy is the
        # time integral of that
        # Note that we use mean torque
        return np.abs((1.0 / self.control_freq) * self.recent_torques.average)

    @property
    def ee_ft_integral(self):
        """
        Returns:
            dict: each arm-specific entry specifies the integral over time of the applied ee force-torque for that arm
        """
        vals = {}
        for arm in self.arms:
            vals[arm] = np.abs((1.0 / self.control_freq) *
                               self.recent_ee_forcetorques[arm].average)
        return vals

    @property
    def ee_force(self):
        """
        Returns:
            dict: each arm-specific entry specifies the force applied at the force sensor at the robot arm's eef
        """
        vals = {}
        for arm in self.arms:
            vals[arm] = self.get_sensor_measurement(
                self.gripper[arm].sensors["force_ee"])
        return vals

    @property
    def ee_torque(self):
        """
        Returns:
            dict: each arm-specific entry specifies the torque applied at the torque sensor at the robot arm's eef
        """
        vals = {}
        for arm in self.arms:
            vals[arm] = self.get_sensor_measurement(
                self.gripper[arm].sensors["torque_ee"])
        return vals

    @property
    def _hand_pose(self):
        """
        Returns:
            dict: each arm-specific entry specifies the eef pose in base frame of robot.
        """
        vals = {}
        for arm in self.arms:
            vals[arm] = self.pose_in_base_from_name(
                self.robot_model.eef_name[arm])
        return vals

    @property
    def _hand_quat(self):
        """
        Returns:
            dict: each arm-specific entry specifies the eef quaternion in base frame of robot.
        """
        vals = {}
        orns = self._hand_orn
        for arm in self.arms:
            vals[arm] = T.mat2quat(orns[arm])
        return vals

    @property
    def _hand_total_velocity(self):
        """
        Returns:
            dict: each arm-specific entry specifies the total eef velocity (linear + angular) in the base frame
            as a numpy array of shape (6,)
        """
        vals = {}
        for arm in self.arms:
            # Determine correct start, end points based on arm
            (start, end) = (None,
                            self._joint_split_idx) if arm == "right" else (
                                self._joint_split_idx, None)

            # Use jacobian to translate joint velocities to end effector velocities.
            Jp = self.sim.data.get_body_jacp(
                self.robot_model.eef_name[arm]).reshape((3, -1))
            Jp_joint = Jp[:, self._ref_joint_vel_indexes[start:end]]

            Jr = self.sim.data.get_body_jacr(
                self.robot_model.eef_name[arm]).reshape((3, -1))
            Jr_joint = Jr[:, self._ref_joint_vel_indexes[start:end]]

            eef_lin_vel = Jp_joint.dot(self._joint_velocities)
            eef_rot_vel = Jr_joint.dot(self._joint_velocities)
            vals[arm] = np.concatenate([eef_lin_vel, eef_rot_vel])
        return vals

    @property
    def _hand_pos(self):
        """
        Returns:
            dict: each arm-specific entry specifies the position of eef in base frame of robot.
        """
        vals = {}
        poses = self._hand_pose
        for arm in self.arms:
            eef_pose_in_base = poses[arm]
            vals[arm] = eef_pose_in_base[:3, 3]
        return vals

    @property
    def _hand_orn(self):
        """
        Returns:
            dict: each arm-specific entry specifies the orientation of eef in base frame of robot as a rotation matrix.
        """
        vals = {}
        poses = self._hand_pose
        for arm in self.arms:
            eef_pose_in_base = poses[arm]
            vals[arm] = eef_pose_in_base[:3, :3]
        return vals

    @property
    def _hand_vel(self):
        """
        Returns:
            dict: each arm-specific entry specifies the velocity of eef in base frame of robot.
        """
        vels = self._hand_total_velocity
        for arm in self.arms:
            vels[arm] = vels[arm][:3]
        return vels

    @property
    def _hand_ang_vel(self):
        """
        Returns:
            dict: each arm-specific entry specifies the angular velocity of eef in base frame of robot.
        """
        vels = self._hand_total_velocity
        for arm in self.arms:
            vels[arm] = vels[arm][3:]
        return vels

    @property
    def _action_split_idx(self):
        """
        Grabs the index that correctly splits the right arm from the left arm actions

        :NOTE: Assumes inputted actions are of form:
            [right_arm_control, right_gripper_control, left_arm_control, left_gripper_control]

        Returns:
            int: Index splitting right from left arm actions
        """
        return self.controller["right"].control_dim + self.gripper["right"].dof if self.has_gripper["right"] \
            else self.controller["right"].control_dim

    @property
    def _joint_split_idx(self):
        """
        Returns:
            int: the index that correctly splits the right arm from the left arm joints
        """
        return int(len(self.robot_joints) / 2)
Ejemplo n.º 6
0
class Quadruped(Robot):
    """
    Initializes a single-armed robot simulation object.

    Args:
        robot_type (str): Specification for specific robot arm to be instantiated within this env (e.g: "Panda")

        idn (int or str): Unique ID of this robot. Should be different from others

        controller_config (dict): If set, contains relevant controller parameters for creating a custom controller.
            Else, uses the default controller for this specific task

        initial_qpos (sequence of float): If set, determines the initial joint positions of the robot to be
            instantiated for the task

        initialization_noise (dict): Dict containing the initialization noise parameters. The expected keys and
            corresponding value types are specified below:

            :`'magnitude'`: The scale factor of uni-variate random noise applied to each of a robot's given initial
                joint positions. Setting this value to "None" or 0.0 results in no noise being applied.
                If "gaussian" type of noise is applied then this magnitude scales the standard deviation applied,
                If "uniform" type of noise is applied then this magnitude sets the bounds of the sampling range
            :`'type'`: Type of noise to apply. Can either specify "gaussian" or "uniform"

            :Note: Specifying None will automatically create the required dict with "magnitude" set to 0.0

        gripper_type (str): type of gripper, used to instantiate
            gripper models from gripper factory. Default is "default", which is the default gripper associated
            within the 'robot' specification. None removes the gripper, and any other (valid) model overrides the
            default gripper

        gripper_visualization (bool): True if using gripper visualization.
            Useful for teleoperation.

        control_freq (float): how many control signals to receive
            in every second. This sets the amount of simulation time
            that passes between every action input.
    """
    def __init__(self,
                 robot_type: str,
                 idn=0,
                 controller_config=None,
                 initial_qpos=None,
                 initialization_noise=None,
                 control_freq=10):

        self.controller = None
        self.controller_config = copy.deepcopy(controller_config)
        self.has_gripper = False
        self.control_freq = control_freq

        self.torques = None  # Current torques being applied

        self._ref_chassis_free_joint_index = None  # xml joint indexes for robot in mjsim
        self._ref_chassis_pos_indexes = None  # xml joint position indexes in mjsim
        self._ref_chassis_vel_indexes = None  # xml joint velocity indexes in mjsim

        self.recent_qpos = None  # Current and last robot arm qpos
        self.recent_actions = None  # Current and last action applied
        self.recent_torques = None  # Current and last torques applied

        super().__init__(
            robot_type=robot_type,
            idn=idn,
            initial_qpos=initial_qpos,
            initialization_noise=initialization_noise,
        )

    def _load_controller(self):
        """
        Loads controller to be used for dynamic trajectories
        """
        # First, load the default controller if none is specified
        if not self.controller_config:
            # Need to update default for a single agent
            controller_path = os.path.join(
                os.path.dirname(__file__), '..',
                'controllers/config/{}.json'.format(
                    self.robot_model.default_controller_config))
            self.controller_config = load_controller_config(
                custom_fpath=controller_path)

        # Assert that the controller config is a dict file:
        #             NOTE: "type" must be one of: {JOINT_POSITION, JOINT_TORQUE, JOINT_VELOCITY,
        #                                           OSC_POSITION, OSC_POSE, IK_POSE}
        assert type(self.controller_config) == dict, \
            "Inputted controller config must be a dict! Instead, got type: {}".format(type(self.controller_config))
        assert self.controller_config["type"] == "LOCOMOTION_JOINT_TORQUE", \
            "Only JOINT_TORQUE controllers are currently supported for quadruped robots, please change the " \
            "controller to be a joint torque controller. Got controller type: {}".format(self.controller_config["type"])

        # Add to the controller dict additional relevant params:
        #   the robot name, mujoco sim, eef_name, joint_indexes, timestep (model) freq,
        #   policy (control) freq, and ndim (# joints)
        self.controller_config["robot_name"] = self.name
        self.controller_config["sim"] = self.sim
        self.controller_config["chassis_name"] = self.robot_model.robot_base
        self.controller_config["joint_indexes"] = {
            "joints": self.joint_indexes,
            "qpos": self._ref_joint_pos_indexes,
            "qvel": self._ref_joint_vel_indexes
        }
        self.controller_config["actuator_range"] = self.torque_limits
        self.controller_config["policy_freq"] = self.control_freq
        self.controller_config["ndim"] = len(self.robot_joints)

        # Instantiate the relevant controller
        self.controller = controller_factory(self.controller_config["type"],
                                             self.controller_config)

    def load_model(self):
        """
        Loads robot and optionally add grippers.
        """
        # First, run the superclass method to load the relevant model
        super().load_model()

        # Verify that the loaded model is of the correct type for this robot
        if self.robot_model.arm_type != "none":
            raise TypeError(
                "Error loading robot model: Incompatible arm type specified for this robot. "
                "Requested model arm type: {}, robot arm type: {}".format(
                    self.robot_model.arm_type, type(self)))

    def reset(self, deterministic=False):
        """
        Sets initial pose of arm and grippers. Overrides gripper joint configuration if we're using a
        deterministic reset (e.g.: hard reset from xml file)

        Args:
            deterministic (bool): If true, will not randomize initializations within the sim
        """
        # First, run the superclass method to reset the position and controller
        super().reset(deterministic)

        # Update base pos / ori references in controller
        self.controller.update_base_pose(self.base_pos, self.base_ori)

        # Setup buffers to hold recent values
        self.recent_qpos = DeltaBuffer(dim=len(self.joint_indexes))
        self.recent_actions = DeltaBuffer(dim=self.action_dim)
        self.recent_torques = DeltaBuffer(dim=len(self.joint_indexes))

    def setup_references(self):
        """
        Sets up necessary reference for robots, grippers, and objects.

        Note that this should get called during every reset from the environment
        """
        # First, run the superclass method to setup references for joint-related values / indexes
        super().setup_references()

        # Now, add references to chassis
        # indices for chassisS in qpos, qvel
        self._ref_chassis_free_joint_index = self.sim.model.joint_name2id(
            self.robot_model.robot_base_free_joint)

        (chassis_free_joint_addr_start, chassis_free_joint_addr_end) = \
            self.sim.model.get_joint_qpos_addr(self.robot_model.robot_base_free_joint)
        self._ref_chassis_pos_indexes = [
            x for x in range(chassis_free_joint_addr_start,
                             chassis_free_joint_addr_end)
        ]
        (chassis_free_joint_addr_start, chassis_free_joint_addr_end) = \
            self.sim.model.get_joint_qvel_addr(self.robot_model.robot_base_free_joint)
        #print("CHASSIS:" + str(self._ref_chassis_pos_indexes))
        self._ref_chassis_vel_indexes = [
            x for x in range(chassis_free_joint_addr_start,
                             chassis_free_joint_addr_end)
        ]

        #print("CHASSIS:" + str(self._ref_chassis_vel_indexes))

    def control(self, action, policy_step=False):
        """
        Actuate the robot with the
        passed joint velocities and gripper control.

        Args:
            action (np.array): The control to apply to the robot. The first @self.robot_model.dof dimensions should be
                the desired normalized joint velocities and if the robot has a gripper, the next @self.gripper.dof
                dimensions should be actuation controls for the gripper.
            policy_step (bool): Whether a new policy step (action) is being taken

        Raises:
            AssertionError: [Invalid action dimension]
        """

        # clip actions into valid range
        assert len(action) == self.action_dim, \
            "environment got invalid action dimension -- expected {}, got {}".format(
                self.action_dim, len(action))

        # Update the controller goal if this is a new policy step
        if policy_step:
            self.controller.set_goal(action)

        # Now run the controller for a step
        torques = self.controller.run_controller()

        # Clip the torques
        low, high = self.torque_limits
        self.torques = np.clip(torques, low, high)

        # Apply joint torque control
        self.sim.data.ctrl[
            self._ref_joint_torq_actuator_indexes] = self.torques

        # If this is a policy step, also update buffers holding recent values of interest
        if policy_step:
            # Update proprioceptive values
            self.recent_qpos.push(self._joint_positions)
            self.recent_actions.push(action)
            self.recent_torques.push(self.torques)

    def visualize_gripper(self):
        """
        Visualizes the gripper site(s) if applicable.
        """
        pass

    def get_observations(self, di: OrderedDict):
        """
        Returns an OrderedDict containing robot observations [(name_string, np.array), ...].

        Important keys:

            `'robot-state'`: contains robot-centric information.

        Args:
            di (OrderedDict): Current set of observations from the environment

        Returns:
            OrderedDict: Augmented set of observations that include this robot's proprioceptive observations
        """

        # Get prefix from robot model to avoid naming clashes for multiple robots
        pf = self.robot_model.naming_prefix

        # proprioceptive features
        di[pf + "chassis_pos"] = np.array(
            [self.sim.data.qpos[x] for x in self._ref_chassis_pos_indexes])

        di[pf + "chassis_vel"] = np.array(
            [self.sim.data.qvel[x] for x in self._ref_chassis_vel_indexes])

        di[pf + "joint_pos"] = np.array(
            [self.sim.data.qpos[x] for x in self._ref_joint_pos_indexes])
        di[pf + "joint_vel"] = np.array(
            [self.sim.data.qvel[x] for x in self._ref_joint_vel_indexes])

        robot_states = [
            di[pf + "chassis_pos"],
            di[pf + "chassis_vel"],
            np.sin(di[pf + "joint_pos"]),
            np.cos(di[pf + "joint_pos"]),
            di[pf + "joint_vel"],
        ]

        # Add in eef pos / qpos
        '''di[pf + "eef_pos"] = np.array(self.sim.data.site_xpos[self.eef_site_id])
        di[pf + "eef_quat"] = T.convert_quat(
            self.sim.data.get_body_xquat(self.robot_model.eef_name), to="xyzw"
        )
        robot_states.extend([di[pf + "eef_pos"], di[pf + "eef_quat"]])
        '''

        di[pf + "robot-state"] = np.concatenate(robot_states)
        return di

    @property
    def base_dof(self):
        return self.robot_model.base_dof

    @property
    def action_limits(self):
        """
        Action lower/upper limits per dimension.

        Returns:
            2-tuple:

                - (np.array) minimum (low) action values
                - (np.array) maximum (high) action values
        """
        # Action limits based on controller limits
        low_c, high_c = self.controller.control_limits
        return low_c, high_c

    @property
    def torque_limits(self):
        """
        Torque lower/upper limits per dimension.

        Returns:
            2-tuple:

                - (np.array) minimum (low) torque values
                - (np.array) maximum (high) torque values
        """
        # Torque limit values pulled from relevant robot.xml file
        low = self.sim.model.actuator_ctrlrange[
            self._ref_joint_torq_actuator_indexes, 0]
        high = self.sim.model.actuator_ctrlrange[
            self._ref_joint_torq_actuator_indexes, 1]

        return low, high

    @property
    def action_dim(self):
        """
        Action space dimension for this robot (controller dimension + gripper dof)

        Returns:
            int: action dimension
        """
        return self.controller.control_dim

    @property
    def dof(self):
        """
        Returns:
            int: degrees of freedom of the robot (with grippers).
        """
        # Get the dof of the base robot model
        dof = super().dof
        return dof

    @property
    def js_energy(self):
        """
        Returns:
            np.array: the energy consumed by each joint between previous and current steps
        """
        # We assume in the motors torque is proportional to current (and voltage is constant)
        # In that case the amount of power scales proportional to the torque and the energy is the
        # time integral of that
        # Note that we use mean torque
        return np.abs((1.0 / self.control_freq) * self.recent_torques.average)