Ejemplo n.º 1
0
class WAMSim(MujocoSimEnv, Serializable):
    """
    WAM Arm from Barrett technologies.

    .. note::
        When using the `reset()` function, always pass a meaningful `init_state`

    .. seealso::
        https://github.com/jhu-lcsr/barrett_model
        http://www.mujoco.org/book/XMLreference.html (e.g. for joint damping)
    """

    name: str = 'wam'

    def __init__(self,
                 frame_skip: int = 1,
                 max_steps: int = pyrado.inf,
                 task_args: [dict, None] = None):
        """
        Constructor

        :param frame_skip: number of frames for holding the same action, i.e. multiplier of the time step size
        :param max_steps: max number of simulation time steps
        :param task_args: arguments for the task construction
        """
        model_path = osp.join(pyrado.MUJOCO_ASSETS_DIR, 'wam_7dof_base.xml')
        super().__init__(model_path, frame_skip, max_steps, task_args)

        self.camera_config = dict(
            trackbodyid=0,  # id of the body to track
            elevation=-30,  # camera rotation around the axis in the plane
            azimuth=-90  # camera rotation around the camera's vertical axis
        )

    @classmethod
    def get_nominal_domain_param(cls) -> dict:
        return dict()

    def _create_spaces(self):
        # Action space
        max_act = np.array([150., 125., 40., 60., 5., 5., 2.])
        self._act_space = BoxSpace(-max_act, max_act)

        # State space
        state_shape = np.concatenate([self.sim.data.qpos,
                                      self.sim.data.qvel]).shape
        max_state = np.full(state_shape, pyrado.inf)
        self._state_space = BoxSpace(-max_state, max_state)

        # Initial state space
        self._init_space = self._state_space.copy()

        # Observation space
        obs_shape = self.observe(max_state).shape
        max_obs = np.full(obs_shape, pyrado.inf)
        self._obs_space = BoxSpace(-max_obs, max_obs)

    def _create_task(self, task_args: dict = None) -> Task:
        state_des = np.concatenate(
            [self.init_qpos.copy(),
             self.init_qvel.copy()])
        return DesStateTask(self.spec, state_des, ZeroPerStepRewFcn())

    def _mujoco_step(self, act: np.ndarray) -> dict:
        self.sim.data.qfrc_applied[:] = act
        self.sim.step()

        qpos, qvel = self.sim.data.qpos.copy(), self.sim.data.qvel.copy()
        self.state = np.concatenate([qpos, qvel])
        return dict()
Ejemplo n.º 2
0
class QBallBalancerSim(SimPyEnv, Serializable):
    """
    Environment in which a ball rolls on an actuated plate. The ball is randomly initialized on the plate and is to be
    stabilized on the center of the plate. The problem formulation treats this setup as 2 independent ball-on-beam
    problems. The plate is actuated via 2 servo motors that lift the plate.

    .. note::
        The dynamics are not the same as in the Quanser Workbook (2 DoF Ball-Balancer - Instructor). Here, we added
        the coriolis forces and linear-viscous friction. However, the 2 dim system is still modeled to be decoupled.
        This is the case, since the two rods (connected to the servos) are pushing the plate at the center lines.
        As a result, the angles alpha and beta are w.r.t. to the inertial frame, i.e. they are not 2 sequential rations.
    """

    name: str = 'qbb'

    def __init__(self,
                 dt: float,
                 max_steps: int = pyrado.inf,
                 task_args: [dict, None] = None,
                 simplified_dyn: bool = False,
                 load_experimental_tholds: bool = True):
        """
        Constructor

        :param dt: simulation step size [s]
        :param max_steps: maximum number of simulation steps
        :param task_args: arguments for the task construction
        :param simplified_dyn: use a dynamics model without Coriolis forces and without friction
        :param load_experimental_tholds: use the voltage thresholds determined from experiments
        """
        Serializable._init(self, locals())

        self._simplified_dyn = simplified_dyn
        self.plate_angs = np.zeros(
            2
        )  # plate's angles alpha and beta [rad] (unused for simplified_dyn = True)

        # Call SimPyEnv's constructor
        super().__init__(dt, max_steps, task_args)

        if not simplified_dyn:
            self._kin = QBallBalancerKin(self)

    def _create_spaces(self):
        l_plate = self.domain_param['l_plate']

        # Define the spaces
        max_state = np.array([
            np.pi / 4.,
            np.pi / 4.,
            l_plate / 2.,
            l_plate / 2.,  # [rad, rad, m, m, ...
            5 * np.pi,
            5 * np.pi,
            0.5,
            0.5
        ])  # ... rad/s, rad/s, m/s, m/s]
        min_init_state = np.array([
            0.75 * l_plate / 2, -np.pi, -0.05 * max_state[6],
            -0.05 * max_state[7]
        ])
        max_init_state = np.array([
            0.8 * l_plate / 2, np.pi, 0.05 * max_state[6], 0.05 * max_state[7]
        ])
        max_act = np.array([3.0, 3.0])  # [V]

        self._state_space = BoxSpace(-max_state,
                                     max_state,
                                     labels=[
                                         r'$\theta_x$', r'$\theta_y$', '$x$',
                                         '$y$', r'$\dot{\theta}_x$',
                                         r'$\dot{\theta}_y$', r'$\dot{x}$',
                                         r'$\dot{y}$'
                                     ])
        self._obs_space = self._state_space.copy()
        self._init_space = Polar2DPosVelSpace(
            min_init_state,
            max_init_state,
            labels=['$r$', r'$\phi$', '$\dot{x}$', '$\dot{y}$'])
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=['$V_x$', '$V_y$'])

        self._curr_act = np.zeros_like(
            max_act)  # just for usage in render function

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get('state_des', np.zeros(8))
        Q = task_args.get(
            'Q', np.diag([1e0, 1e0, 5e3, 5e3, 1e-2, 1e-2, 5e-1, 5e-1]))
        R = task_args.get('R', np.diag([1e-2, 1e-2]))
        # Q = np.diag([1e2, 1e2, 5e2, 5e2, 1e-2, 1e-2, 1e+1, 1e+1])  # for LQR
        # R = np.diag([1e-2, 1e-2])  # for LQR

        return DesStateTask(
            self.spec, state_des,
            ScaledExpQuadrErrRewFcn(Q,
                                    R,
                                    self.state_space,
                                    self.act_space,
                                    min_rew=1e-4))

    # Cache measured thresholds during one run and reduce console log spam that way
    measured_tholds = None

    @classmethod
    def get_V_tholds(cls, load_experiments: bool = True) -> dict:
        """ If available, the voltage thresholds computed from measurements, else use default values. """
        # Hard-coded default thresholds
        tholds = dict(V_thold_x_pos=0.28,
                      V_thold_x_neg=-0.10,
                      V_thold_y_pos=0.28,
                      V_thold_y_neg=-0.074)

        if load_experiments:
            if cls.measured_tholds is None:
                ex_dir = osp.join(pyrado.EVAL_DIR, 'volt_thold_qbb')
                if osp.exists(ex_dir) and osp.isdir(ex_dir) and os.listdir(
                        ex_dir):
                    print_cbt('Found measured thresholds, using the averages.',
                              'g')
                    # Calculate cumulative running average
                    cma = np.zeros((2, 2))
                    i = 0.
                    for f in os.listdir(ex_dir):
                        if f.endswith('.npy'):
                            i += 1.
                            cma = cma + (np.load(osp.join(ex_dir, f)) -
                                         cma) / i
                    tholds['V_thold_x_pos'] = cma[0, 1]
                    tholds['V_thold_x_neg'] = cma[0, 0]
                    tholds['V_thold_y_pos'] = cma[1, 1]
                    tholds['V_thold_y_neg'] = cma[1, 0]
                else:
                    print_cbt(
                        'No measured thresholds found, falling back to default values.',
                        'y')

                # Cache results for future calls
                cls.measured_tholds = tholds
            else:
                tholds = cls.measured_tholds

        return tholds

    @classmethod
    def get_nominal_domain_param(cls) -> dict:
        V_tholds = cls.get_V_tholds()
        return dict(
            g=9.81,  # gravity constant [m/s**2]
            m_ball=0.003,  # mass of the ball [kg]
            r_ball=0.019625,  # radius of the ball [m]
            l_plate=0.275,  # length of the (square) plate [m]
            r_arm=
            0.0254,  # distance between the servo output gear shaft and the coupled joint [m]
            K_g=70.,  # gear ratio [-]
            eta_g=0.9,  # gearbox efficiency [-]
            J_l=5.2822e-5,  # load moment of inertia [kg*m**2]
            J_m=4.6063e-7,  # motor moment of inertia [kg*m**2]
            k_m=
            0.0077,  # motor torque constant [N*m/A] = back-EMF constant [V*s/rad]
            R_m=2.6,  # motor armature resistance
            eta_m=0.69,  # motor efficiency [-]
            B_eq=
            0.015,  # equivalent viscous damping coefficient w.r.t. load [N*m*s/rad]
            c_frict=0.05,  # viscous friction coefficient [N*s/m]
            V_thold_x_pos=V_tholds[
                'V_thold_x_pos'],  # voltage required to move the x servo in positive dir
            V_thold_x_neg=V_tholds[
                'V_thold_x_neg'],  # voltage required to move the x servo in negative dir
            V_thold_y_pos=V_tholds[
                'V_thold_y_pos'],  # voltage required to move the y servo in positive dir
            V_thold_y_neg=V_tholds[
                'V_thold_y_neg'],  # voltage required to move the y servo in negative dir
            offset_th_x=0.,  # angular offset of the x axis motor shaft [rad]
            offset_th_y=0.)  # angular offset of the y axis motor shaft [rad]

    def _calc_constants(self):
        l_plate = self.domain_param['l_plate']
        m_ball = self.domain_param['m_ball']
        r_ball = self.domain_param['r_ball']
        eta_g = self.domain_param['eta_g']
        eta_m = self.domain_param['eta_m']
        K_g = self.domain_param['K_g']
        J_m = self.domain_param['J_m']
        J_l = self.domain_param['J_l']
        r_arm = self.domain_param['r_arm']
        k_m = self.domain_param['k_m']
        R_m = self.domain_param['R_m']
        B_eq = self.domain_param['B_eq']

        self.J_ball = 2. / 5 * m_ball * r_ball**2  # inertia of the ball [kg*m**2]
        self.J_eq = eta_g * K_g**2 * J_m + J_l  # equivalent moment of inertia [kg*m**2]
        self.c_kin = 2. * r_arm / l_plate  # coefficient for the rod-plate kinematic
        self.A_m = eta_g * K_g * eta_m * k_m / R_m
        self.B_eq_v = eta_g * K_g**2 * eta_m * k_m**2 / R_m + B_eq
        self.zeta = m_ball * r_ball**2 + self.J_ball  # combined moment of inertial for the ball

    def _state_from_init(self, init_state):
        state = np.zeros(8)
        state[2:4] = init_state[:2]
        state[6:8] = init_state[2:]
        return state

    def reset(self, init_state=None, domain_param=None):
        obs = super().reset(init_state=init_state, domain_param=domain_param)

        # Reset the plate angles
        if self._simplified_dyn:
            self.plate_angs = np.zeros(
                2)  # actually not necessary since not used
        else:
            offset_th_x = self.domain_param['offset_th_x']
            offset_th_y = self.domain_param['offset_th_y']
            # Get the plate angles from inverse kinematics for initial pose
            self.plate_angs[0] = self._kin(self.state[0] + offset_th_x)
            self.plate_angs[1] = self._kin(self.state[1] + offset_th_y)

        # Return perfect observation
        return obs

    def _step_dynamics(self, act: np.ndarray):
        g = self.domain_param['g']
        m_ball = self.domain_param['m_ball']
        r_ball = self.domain_param['r_ball']
        c_frict = self.domain_param['c_frict']
        V_thold_x_neg = self.domain_param['V_thold_x_neg']
        V_thold_x_pos = self.domain_param['V_thold_x_pos']
        V_thold_y_neg = self.domain_param['V_thold_y_neg']
        V_thold_y_pos = self.domain_param['V_thold_y_pos']
        offset_th_x = self.domain_param['offset_th_x']
        offset_th_y = self.domain_param['offset_th_y']

        if not self._simplified_dyn:
            # Apply a voltage dead zone (i.e. below a certain amplitude the system does not move). This is a very
            # simple model of static friction. Experimentally evaluated the voltage required to get the plate moving.
            if V_thold_x_neg <= act[0] <= V_thold_x_pos:
                act[0] = 0
            if V_thold_y_neg <= act[1] <= V_thold_y_pos:
                act[1] = 0

        # State
        th_x = self.state[0] + offset_th_x  # angle of the x axis servo (load)
        th_y = self.state[1] + offset_th_y  # angle of the y axis servo (load)
        x = self.state[2]  # ball position along the x axis
        y = self.state[3]  # ball position along the y axis
        th_x_dot = self.state[4]  # angular velocity of the x axis servo (load)
        th_y_dot = self.state[5]  # angular velocity of the y axis servo (load)
        x_dot = self.state[6]  # ball velocity along the x axis
        y_dot = self.state[7]  # ball velocity along the y axis

        th_x_ddot = (self.A_m * act[0] - self.B_eq_v * th_x_dot) / self.J_eq
        th_y_ddot = (self.A_m * act[1] - self.B_eq_v * th_y_dot) / self.J_eq
        '''
        THIS IS TIME INTENSIVE
        if not self._simplified_dyn:
            # Get the plate angles from inverse kinematics
            self.plate_angs[0] = self._kin(self.state[0] + self.offset_th_x)
            self.plate_angs[1] = self._kin(self.state[1] + self.offset_th_y)
        '''

        # Plate (not part of the state since it is a redundant information)
        # The definition of th_y is opposing beta, i.e.
        a = self.plate_angs[0]  # plate's angle around the y axis (alpha)
        b = self.plate_angs[1]  # plate's angle around the x axis (beta)
        a_dot = self.c_kin * th_x_dot * np.cos(th_x) / np.cos(
            a)  # plate's angular velocity around the y axis (alpha)
        b_dot = self.c_kin * -th_y_dot * np.cos(-th_y) / np.cos(
            b)  # plate's angular velocity around the x axis (beta)
        # Plate's angular accelerations (unused for simplified_dyn = True)
        a_ddot = 1. / np.cos(a) * (
            self.c_kin *
            (th_x_ddot * np.cos(th_x) - th_x_dot**2 * np.sin(th_x)) +
            a_dot**2 * np.sin(a))
        b_ddot = 1. / np.cos(b) * (self.c_kin *
                                   (-th_y_ddot * np.cos(th_y) -
                                    (-th_y_dot)**2 * np.sin(-th_y)) +
                                   b_dot**2 * np.sin(b))

        # kinematics: sin(a) = self.c_kin * sin(th_x)
        if self._simplified_dyn:
            # Ball dynamic without friction and Coriolis forces
            x_ddot = self.c_kin * m_ball * g * r_ball**2 * np.sin(
                th_x) / self.zeta  # symm inertia
            y_ddot = self.c_kin * m_ball * g * r_ball**2 * np.sin(
                th_y) / self.zeta  # symm inertia
        else:
            # Ball dynamic with friction and Coriolis forces
            x_ddot = (
                -c_frict * x_dot * r_ball**2  # friction
                - self.J_ball * r_ball * a_ddot  # plate influence
                + m_ball * x * a_dot**2 * r_ball**2  # centripetal
                + self.c_kin * m_ball * g * r_ball**2 * np.sin(th_x)  # gravity
            ) / self.zeta
            y_ddot = (
                -c_frict * y_dot * r_ball**2  # friction
                - self.J_ball * r_ball * b_ddot  # plate influence
                + m_ball * y * (-b_dot)**2 * r_ball**2  # centripetal
                + self.c_kin * m_ball * g * r_ball**2 * np.sin(th_y)  # gravity
            ) / self.zeta

        # Integration step (symplectic Euler)
        self.state[4:] += np.array([th_x_ddot, th_y_ddot, x_ddot, y_ddot
                                    ]) * self._dt  # next velocity
        self.state[:4] += self.state[4:] * self._dt  # next position

        # Integration step (forward Euler)
        self.plate_angs += np.array([
            a_dot, b_dot
        ]) * self._dt  # just for debugging when simplified dynamics

    def _init_anim(self):
        import vpython as vp

        l_plate = self.domain_param['l_plate']
        m_ball = self.domain_param['m_ball']
        r_ball = self.domain_param['r_ball']
        d_plate = 0.01  # only for animation

        # Init render objects on first call
        self._anim['canvas'] = vp.canvas(width=800,
                                         height=800,
                                         title="Quanser Ball-Balancer")
        self._anim['ball'] = vp.sphere(pos=vp.vec(self.state[2], self.state[3],
                                                  r_ball + d_plate / 2.),
                                       radius=r_ball,
                                       mass=m_ball,
                                       color=vp.color.red,
                                       canvas=self._anim['canvas'])
        self._anim['plate'] = vp.box(pos=vp.vec(0, 0, 0),
                                     size=vp.vec(l_plate, l_plate, d_plate),
                                     color=vp.color.green,
                                     canvas=self._anim['canvas'])
        self._anim['null_plate'] = vp.box(
            pos=vp.vec(0, 0, 0),
            size=vp.vec(l_plate * 1.1, l_plate * 1.1, d_plate / 10),
            color=vp.color.cyan,
            opacity=0.5,  # 0 is fully transparent
            canvas=self._anim['canvas'])

    def _update_anim(self):
        import vpython as vp

        g = self.domain_param['g']
        l_plate = self.domain_param['l_plate']
        m_ball = self.domain_param['m_ball']
        r_ball = self.domain_param['r_ball']
        eta_g = self.domain_param['eta_g']
        eta_m = self.domain_param['eta_m']
        K_g = self.domain_param['K_g']
        J_m = self.domain_param['J_m']
        J_l = self.domain_param['J_l']
        r_arm = self.domain_param['r_arm']
        k_m = self.domain_param['k_m']
        R_m = self.domain_param['R_m']
        B_eq = self.domain_param['B_eq']
        c_frict = self.domain_param['c_frict']
        V_thold_x_neg = self.domain_param['V_thold_x_neg']
        V_thold_x_pos = self.domain_param['V_thold_x_pos']
        V_thold_y_neg = self.domain_param['V_thold_y_neg']
        V_thold_y_pos = self.domain_param['V_thold_y_pos']
        offset_th_x = self.domain_param['offset_th_x']
        offset_th_y = self.domain_param['offset_th_y']
        d_plate = 0.01  # only for animation
        #  Compute plate orientation
        a_vp = -self.plate_angs[0]  # plate's angle around the y axis (alpha)
        b_vp = self.plate_angs[1]  # plate's angle around the x axis (beta)

        # Axis runs along the x direction
        self._anim['plate'].size = vp.vec(l_plate, l_plate, d_plate)
        self._anim['plate'].axis = vp.vec(vp.cos(a_vp), 0,
                                          vp.sin(a_vp)) * float(l_plate)
        # Up runs along the y direction (vpython coordinate system)
        self._anim['plate'].up = vp.vec(0, vp.cos(b_vp), vp.sin(b_vp))

        # Get ball position
        x = self.state[2]  # along the x axis
        y = self.state[3]  # along the y axis

        self._anim['ball'].pos = vp.vec(
            x * vp.cos(a_vp), y * vp.cos(b_vp), r_ball + x * vp.sin(a_vp) +
            y * vp.sin(b_vp) + vp.cos(a_vp) * d_plate / 2.)
        self._anim['ball'].radius = r_ball

        # Set caption text
        self._anim['canvas'].caption = f"""
Ejemplo n.º 3
0
class QBallBalancerSim(SimPyEnv, Serializable):
    """
    Environment in which a ball rolls on an actuated plate. The ball is randomly initialized on the plate and is to be
    stabilized on the center of the plate. The problem formulation treats this setup as 2 independent ball-on-beam
    problems. The plate is actuated via 2 servo motors that lift the plate.

    .. note::
        The dynamics are not the same as in the Quanser Workbook (2 DoF Ball-Balancer - Instructor). Here, we added
        the coriolis forces and linear-viscous friction. However, the 2 dim system is still modeled to be decoupled.
        This is the case, since the two rods (connected to the servos) are pushing the plate at the center lines.
        As a result, the angles alpha and beta are w.r.t. to the inertial frame, i.e. they are not 2 sequential rations.
    """

    name: str = "qbb"

    def __init__(
        self,
        dt: float,
        max_steps: int = pyrado.inf,
        task_args: Optional[dict] = None,
        simple_dynamics: bool = False,
        load_experimental_tholds: bool = True,
    ):
        """
        Constructor

        :param dt: simulation step size [s]
        :param max_steps: maximum number of simulation steps
        :param task_args: arguments for the task construction
        :param simple_dynamics: if `True, use a dynamics model without Coriolis forces and without friction effects
        :param load_experimental_tholds: use the voltage thresholds determined from experiments
        """
        Serializable._init(self, locals())

        self._simple_dynamics = simple_dynamics
        self.plate_angs = np.zeros(
            2
        )  # plate's angles alpha and beta [rad] (unused for simple_dynamics = True)

        # Call SimPyEnv's constructor
        super().__init__(dt, max_steps, task_args)

        if not simple_dynamics:
            self._kin = QBallBalancerKin(self)

    def _create_spaces(self):
        l_plate = self.domain_param["plate_length"]

        # Define the spaces
        max_state = np.array([
            np.pi / 4.0,
            np.pi / 4.0,
            l_plate / 2.0,
            l_plate / 2.0,  # [rad, rad, m, m, ...
            5 * np.pi,
            5 * np.pi,
            0.5,
            0.5,
        ])  # ... rad/s, rad/s, m/s, m/s]
        min_init_state = np.array([
            0.75 * l_plate / 2, -np.pi, -0.05 * max_state[6],
            -0.05 * max_state[7]
        ])
        max_init_state = np.array([
            0.8 * l_plate / 2, np.pi, 0.05 * max_state[6], 0.05 * max_state[7]
        ])

        self._state_space = BoxSpace(
            -max_state,
            max_state,
            labels=[
                "theta_x", "theta_y", "x", "y", "theta_x_dot", "theta_y_dot",
                "x_dot", "y_dot"
            ],
        )
        self._obs_space = self._state_space.copy()
        self._init_space = Polar2DPosVelSpace(
            min_init_state,
            max_init_state,
            labels=["r", "phi", "x_dot", "y_dot"])
        self._act_space = BoxSpace(-MAX_ACT_QBB,
                                   MAX_ACT_QBB,
                                   labels=["V_x", "V_y"])

        self._curr_act = np.zeros_like(
            MAX_ACT_QBB)  # just for usage in render function

    def _create_task(self, task_args: dict) -> Task:
        # Define the task including the reward function
        state_des = task_args.get("state_des", np.zeros(8))
        Q = task_args.get(
            "Q", np.diag([1e0, 1e0, 5e3, 5e3, 1e-2, 1e-2, 5e-1, 5e-1]))
        R = task_args.get("R", np.diag([1e-2, 1e-2]))
        # Q = np.diag([1e2, 1e2, 5e2, 5e2, 1e-2, 1e-2, 1e+1, 1e+1])  # for LQR
        # R = np.diag([1e-2, 1e-2])  # for LQR

        return DesStateTask(
            self.spec, state_des,
            ScaledExpQuadrErrRewFcn(Q,
                                    R,
                                    self.state_space,
                                    self.act_space,
                                    min_rew=1e-4))

    # Cache measured thresholds during one run and reduce console log spam that way
    measured_tholds = None

    @classmethod
    def get_voltage_tholds(cls, load_experiments: bool = True) -> dict:
        """If available, the voltage thresholds computed from measurements, else use default values."""
        # Hard-coded default thresholds
        tholds = dict(voltage_thold_x_pos=0.28,
                      voltage_thold_x_neg=-0.10,
                      voltage_thold_y_pos=0.28,
                      voltage_thold_y_neg=-0.074)

        if load_experiments:
            if cls.measured_tholds is None:
                ex_dir = osp.join(pyrado.EVAL_DIR, "volt_thold_qbb")
                if osp.exists(ex_dir) and osp.isdir(ex_dir) and os.listdir(
                        ex_dir):
                    print_cbt_once(
                        "Found measured thresholds, using the averages.", "g")
                    # Calculate cumulative running average
                    cma = np.zeros((2, 2))
                    i = 0.0
                    for f in filter(lambda f: f.endswith(".npy"),
                                    os.listdir(".npy")):
                        i += 1.0
                        cma = cma + (np.load(osp.join(ex_dir, f)) - cma) / i
                    tholds["voltage_thold_x_pos"] = cma[0, 1]
                    tholds["voltage_thold_x_neg"] = cma[0, 0]
                    tholds["voltage_thold_y_pos"] = cma[1, 1]
                    tholds["voltage_thold_y_neg"] = cma[1, 0]
                else:
                    print_cbt_once(
                        "No measured thresholds found, falling back to default values.",
                        "y")

                # Cache results for future calls
                cls.measured_tholds = tholds
            else:
                tholds = cls.measured_tholds

        return tholds

    @classmethod
    def get_nominal_domain_param(cls) -> dict:
        voltage_tholds = cls.get_voltage_tholds()
        return dict(
            gravity_const=9.81,  # gravity constant [m/s**2]
            ball_mass=0.003,  # mass of the ball [kg]
            ball_radius=0.019625,  # radius of the ball [m]
            plate_length=0.275,  # length of the (square) plate [m]
            arm_radius=
            0.0254,  # distance between the servo output gear shaft and the coupled joint [m]
            gear_ratio=70.0,  # gear ratio [-]
            gear_efficiency=0.9,  # gearbox efficiency [-]
            load_inertia=5.2822e-5,  # load moment of inertia [kg*m**2]
            motor_inertia=4.6063e-7,  # motor moment of inertia [kg*m**2]
            motor_back_emf=
            0.0077,  # motor torque constant [N*m/A] = back-EMF constant [V*s/rad]
            motor_resistance=2.6,  # motor armature resistance
            motor_efficiency=0.69,  # motor efficiency [-]
            combined_damping=
            0.015,  # equivalent viscous damping coefficient w.r.t. load [N*m*s/rad]
            ball_damping=
            0.05,  # viscous damping coefficient for the ball velocity [N*s/m]
            voltage_thold_x_pos=voltage_tholds[
                "voltage_thold_x_pos"],  # min. voltage required to move the x servo in pos. dir. [V]
            voltage_thold_x_neg=voltage_tholds[
                "voltage_thold_x_neg"],  # min. voltage required to move the x servo in neg. dir. [V]
            voltage_thold_y_pos=voltage_tholds[
                "voltage_thold_y_pos"],  # min. voltage required to move the y servo in pos. dir. [V]
            voltage_thold_y_neg=voltage_tholds[
                "voltage_thold_y_neg"],  # min. voltage required to move the y servo in neg. dir. [V]
            offset_th_x=0.0,  # angular offset of the x axis motor shaft [rad]
            offset_th_y=0.0,  # angular offset of the y axis motor shaft [rad]
        )

    def _calc_constants(self):
        l_plate = self.domain_param["plate_length"]
        m_ball = self.domain_param["ball_mass"]
        r_ball = self.domain_param["ball_radius"]
        eta_g = self.domain_param["gear_efficiency"]
        eta_m = self.domain_param["motor_efficiency"]
        K_g = self.domain_param["gear_ratio"]
        J_m = self.domain_param["motor_inertia"]
        J_l = self.domain_param["load_inertia"]
        r_arm = self.domain_param["arm_radius"]
        k_m = self.domain_param["motor_back_emf"]
        R_m = self.domain_param["motor_resistance"]
        B_eq = self.domain_param["combined_damping"]

        self.J_ball = 2.0 / 5 * m_ball * r_ball**2  # inertia of the ball [kg*m**2]
        self.J_eq = eta_g * K_g**2 * J_m + J_l  # equivalent moment of inertia [kg*m**2]
        self.c_kin = 2.0 * r_arm / l_plate  # coefficient for the rod-plate kinematic
        self.A_m = eta_g * K_g * eta_m * k_m / R_m
        self.B_eq_v = eta_g * K_g**2 * eta_m * k_m**2 / R_m + B_eq
        self.zeta = m_ball * r_ball**2 + self.J_ball  # combined moment of inertial for the ball

    def _state_from_init(self, init_state):
        state = np.zeros(8)
        state[2:4] = init_state[:2]
        state[6:8] = init_state[2:]
        return state

    def reset(self,
              init_state: np.ndarray = None,
              domain_param: dict = None) -> np.ndarray:
        obs = super().reset(init_state=init_state, domain_param=domain_param)

        # Reset the plate angles
        if self._simple_dynamics:
            self.plate_angs = np.zeros(
                2)  # actually not necessary since not used
        else:
            offset_th_x = self.domain_param["offset_th_x"]
            offset_th_y = self.domain_param["offset_th_y"]
            # Get the plate angles from inverse kinematics for initial pose
            self.plate_angs[0] = self._kin(self.state[0] + offset_th_x)
            self.plate_angs[1] = self._kin(self.state[1] + offset_th_y)

        # Return perfect observation
        return obs

    def _step_dynamics(self, act: np.ndarray):
        gravity_const = self.domain_param["gravity_const"]
        m_ball = self.domain_param["ball_mass"]
        r_ball = self.domain_param["ball_radius"]
        ball_damping = self.domain_param["ball_damping"]
        V_thold_x_neg = self.domain_param["voltage_thold_x_neg"]
        V_thold_x_pos = self.domain_param["voltage_thold_x_pos"]
        V_thold_y_neg = self.domain_param["voltage_thold_y_neg"]
        V_thold_y_pos = self.domain_param["voltage_thold_y_pos"]
        offset_th_x = self.domain_param["offset_th_x"]
        offset_th_y = self.domain_param["offset_th_y"]

        # Apply a voltage dead zone, i.e. below a certain amplitude the system is will not move. This is a very
        # simple model of static friction. Experimentally evaluated the voltage required to get the plate moving.
        if not self._simple_dynamics and V_thold_x_neg <= act[
                0] <= V_thold_x_pos:
            act[0] = 0
        if not self._simple_dynamics and V_thold_y_neg <= act[
                1] <= V_thold_y_pos:
            act[1] = 0

        # State
        th_x = self.state[0] + offset_th_x  # angle of the x axis servo (load)
        th_y = self.state[1] + offset_th_y  # angle of the y axis servo (load)
        x = self.state[2]  # ball position along the x axis
        y = self.state[3]  # ball position along the y axis
        th_x_dot = self.state[4]  # angular velocity of the x axis servo (load)
        th_y_dot = self.state[5]  # angular velocity of the y axis servo (load)
        x_dot = self.state[6]  # ball velocity along the x axis
        y_dot = self.state[7]  # ball velocity along the y axis

        th_x_ddot = (self.A_m * act[0] - self.B_eq_v * th_x_dot) / self.J_eq
        th_y_ddot = (self.A_m * act[1] - self.B_eq_v * th_y_dot) / self.J_eq
        """
        THIS IS TIME INTENSIVE
        if not self._simple_dynamics:
            # Get the plate angles from inverse kinematics
            self.plate_angs[0] = self._kin(self.state[0] + self.offset_th_x)
            self.plate_angs[1] = self._kin(self.state[1] + self.offset_th_y)
        """

        # Plate (not part of the state since it is a redundant information)
        # The definition of th_y is opposing beta, i.e.
        a = self.plate_angs[0]  # plate's angle around the y axis (alpha)
        b = self.plate_angs[1]  # plate's angle around the x axis (beta)
        a_dot = self.c_kin * th_x_dot * np.cos(th_x) / np.cos(
            a)  # plate's angular velocity around the y axis (alpha)
        b_dot = self.c_kin * -th_y_dot * np.cos(-th_y) / np.cos(
            b)  # plate's angular velocity around the x axis (beta)
        # Plate's angular accelerations (unused for simple_dynamics = True)
        a_ddot = (1.0 / np.cos(a) *
                  (self.c_kin *
                   (th_x_ddot * np.cos(th_x) - th_x_dot**2 * np.sin(th_x)) +
                   a_dot**2 * np.sin(a)))
        b_ddot = (1.0 / np.cos(b) *
                  (self.c_kin *
                   (-th_y_ddot * np.cos(th_y) -
                    (-th_y_dot)**2 * np.sin(-th_y)) + b_dot**2 * np.sin(b)))

        # kinematics: sin(a) = self.c_kin * sin(th_x)
        if self._simple_dynamics:
            # Ball dynamic without friction and Coriolis forces
            x_ddot = self.c_kin * m_ball * gravity_const * r_ball**2 * np.sin(
                th_x) / self.zeta  # symm inertia
            y_ddot = self.c_kin * m_ball * gravity_const * r_ball**2 * np.sin(
                th_y) / self.zeta  # symm inertia
        else:
            # Ball dynamic with friction and Coriolis forces
            x_ddot = (
                -ball_damping * x_dot * r_ball**2  # friction
                - self.J_ball * r_ball * a_ddot  # plate influence
                + m_ball * x * a_dot**2 * r_ball**2  # centripetal
                + self.c_kin * m_ball * gravity_const * r_ball**2 *
                np.sin(th_x)  # gravity
            ) / self.zeta
            y_ddot = (
                -ball_damping * y_dot * r_ball**2  # friction
                - self.J_ball * r_ball * b_ddot  # plate influence
                + m_ball * y * (-b_dot)**2 * r_ball**2  # centripetal
                + self.c_kin * m_ball * gravity_const * r_ball**2 *
                np.sin(th_y)  # gravity
            ) / self.zeta

        # Integration step (symplectic Euler)
        self.state[4:] += np.array([th_x_ddot, th_y_ddot, x_ddot, y_ddot
                                    ]) * self._dt  # next velocity
        self.state[:4] += self.state[4:] * self._dt  # next position

        # Integration step (forward Euler)
        self.plate_angs += np.array([
            a_dot, b_dot
        ]) * self._dt  # just for debugging when simplified dynamics

    def _init_anim(self):
        # Import PandaVis Class
        from pyrado.environments.pysim.pandavis import QBallBalancerVis

        # Create instance of PandaVis
        self._visualization = QBallBalancerVis(self, self._rendering)