Beispiel #1
0
def test_real_env_contructors(dt, max_steps):
    qbbr = QBallBalancerReal(dt=dt, max_steps=max_steps)
    assert qbbr is not None
    qcp_st = QCartPoleStabReal(dt=dt, max_steps=max_steps)
    assert qcp_st is not None
    qcp_su = QCartPoleSwingUpReal(dt=dt, max_steps=max_steps)
    assert qcp_su is not None
    qqr = QQubeReal(dt=dt, max_steps=max_steps)
    assert qqr is not None
Beispiel #2
0
if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Set up PD-controller
    if args.env_name in QBallBalancerReal.name:
        env = QBallBalancerReal(args.dt, args.max_steps)
        policy = QBallBalancerPDCtrl(env.spec,
                                     kp=to.diag(to.tensor([3.45, 3.45])),
                                     kd=to.diag(to.tensor([2.11, 2.11])))
        print_cbt('Set up controller for the QBallBalancerReal environment.',
                  'c')

    elif args.env_name == QCartPoleStabReal.name:
        env = QCartPoleStabReal(args.dt, args.max_steps)
        policy = CartpoleStabilizerPolicy(env.spec,
                                          K=np.array([
                                              1.2278416e+00, 4.5279346e+00,
                                              -1.2385756e-02, 6.0038762e+00,
                                              -4.1818547e+00
                                          ]))
        # policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)
        print_cbt('Set up controller for the QCartPoleStabReal environment.',
                  'c')

    elif args.env_name == QCartPoleSwingUpReal.name:
        env = QCartPoleSwingUpReal(args.dt, args.max_steps)
        policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)
        print_cbt(
            'Set up controller for the QCartPoleSwingUpReal environment.', 'c')
Beispiel #3
0
if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment()
    ex_tag = ex_dir.split('--', 1)[1]

    # Load the policy and the environment (for constructing the real-world counterpart)
    env_sim, policy, _ = load_experiment(ex_dir)

    if args.verbose:
        print(f'Policy params:\n{policy.param_values.detach().numpy()}')

    # Create real-world counterpart (without domain randomization)
    env_real = QCartPoleStabReal(args.dt, args.max_steps)
    print_cbt('Set up the QCartPoleStabReal environment.', 'c')

    # Set up the disturber
    disturber_pos = TimePolicy(env_real.spec, volt_disturbance_pos,
                               env_real.dt)
    disturber_neg = TimePolicy(env_real.spec, volt_disturbance_neg,
                               env_real.dt)
    steps_disturb = 10
    print_cbt(
        f'Set up the disturbers for the QCartPoleStabReal environment.'
        f'\nVolt disturbance: {6} volts for {steps_disturb} steps', 'c')

    # Center cart and reset velocity filters and wait until the user or the conroller has put pole upright
    env_real.reset()
    print_cbt('Ready', 'g')
Beispiel #4
0
 def default_qcpst_real():
     return QCartPoleStabReal(dt=1 / 500.0, max_steps=500)