コード例 #1
0
def test_real_env_contructors(dt, max_steps):
    qbbr = QBallBalancerReal(dt=dt, max_steps=max_steps)
    assert qbbr is not None
    qcp_st = QCartPoleStabReal(dt=dt, max_steps=max_steps)
    assert qcp_st is not None
    qcp_su = QCartPoleSwingUpReal(dt=dt, max_steps=max_steps)
    assert qcp_su is not None
    qqr = QQubeReal(dt=dt, max_steps=max_steps)
    assert qqr is not None
コード例 #2
0
if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir
    ex_tag = ex_dir.split('--', 1)[1]

    # Load the policy and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir, args)

    if args.verbose:
        print(f'Policy params:\n{policy.param_values.detach().cpu().numpy()}')

    # Create real-world counterpart (without domain randomization)
    env_real = QBallBalancerReal(args.dt)
    print_cbt('Set up the QBallBalancerReal environment.', 'c')

    # Set up PD-controller
    pdctrl = QBallBalancerPDCtrl(env_real.spec)
    if args.random_init_state:
        # Random initial state
        min_init_state = np.array([0.7 * 0.275 / 2, 0])
        max_init_state = np.array([0.8 * 0.275 / 2, 2 * np.pi])
        init_space = Polar2DPosSpace(min_init_state, max_init_state)
        init_state = init_space.sample_uniform()
    else:
        # Center of the plate is initial state
        # init_state = np.zeros(2)
        # init_state = np.array([1/np.sqrt(2), 1/np.sqrt(2)])
        # init_state = np.array([1., 0.])
コード例 #3
0
from pyrado.utils.input_output import print_cbt
from pyrado.utils.argparser import get_argparser

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment()

    # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    # Detect the correct real-world counterpart and create it
    if isinstance(inner_env(env_sim), QBallBalancerSim):
        env_real = QBallBalancerReal(dt=args.dt, max_steps=args.max_steps)
    elif isinstance(inner_env(env_sim), QCartPoleSim):
        env_real = QCartPoleReal(dt=args.dt, max_steps=args.max_steps)
    elif isinstance(inner_env(env_sim), QQubeSim):
        env_real = QQubeReal(dt=args.dt, max_steps=args.max_steps)
    else:
        raise pyrado.TypeErr(
            given=env_sim,
            expected_type=[QBallBalancerSim, QCartPoleSim, QQubeSim])
    print_cbt(f'Set up env {env_real.name}.', 'c')

    # Finally wrap the env in the same as done during training
    env_real = wrap_like_other_env(env_real, env_sim)

    # Run on device
    done = False
コード例 #4
0
                        default=2e-5,
                        help='Step size for the action')
    parser.add_argument(
        '--thold_theta_deg',
        type=float,
        default=0.1,
        help='Threshold for the servo shafts to rotate in degree')
    parser.add_argument('--thold_x',
                        type=float,
                        default=5e-4,
                        help='Threshold for the cart to move in meter')
    args = parser.parse_args()

    if args.env_name == QBallBalancerReal.name:
        # Create the interface for communicating with the device
        env = QBallBalancerReal(args.dt, args.max_steps)
        print_cbt('Set up the QBallBalancerReal environment.', 'c')

        # Initialize
        thold_theta = args.thold_theta_deg / 180. * np.pi
        tholds_V = np.zeros(
            (args.num_runs, 2,
             2))  # num_runs matrices in format [[-x, +x], [-y, +y]]

        env.reset()  # sends a zero action until sth else is commanded
        input(
            'Servo voltages set to [0, 0]. Prepare and then press enter to start.'
        )

        # Run
        for r in range(args.num_runs):
コード例 #5
0
        Returns:
            The bounded variable.

        """
        return np.maximum(min_value, np.minimum(x, max_value))


# python ... -env-name qcp-st

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Set up PD-controller
    if args.env_name in QBallBalancerReal.name:
        env = QBallBalancerReal(args.dt, args.max_steps)
        policy = QBallBalancerPDCtrl(env.spec,
                                     kp=to.diag(to.tensor([3.45, 3.45])),
                                     kd=to.diag(to.tensor([2.11, 2.11])))
        print_cbt('Set up controller for the QBallBalancerReal environment.',
                  'c')

    elif args.env_name == QCartPoleStabReal.name:
        env = QCartPoleStabReal(args.dt, args.max_steps)
        policy = CartpoleStabilizerPolicy(env.spec,
                                          K=np.array([
                                              1.2278416e+00, 4.5279346e+00,
                                              -1.2385756e-02, 6.0038762e+00,
                                              -4.1818547e+00
                                          ]))
        # policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)
コード例 #6
0
ファイル: conftest.py プロジェクト: fdamken/SimuRLacra
 def default_qqbb_real():
     return QBallBalancerReal(dt=1 / 500.0, max_steps=500)