def test_real_env_contructors(dt, max_steps): qbbr = QBallBalancerReal(dt=dt, max_steps=max_steps) assert qbbr is not None qcp_st = QCartPoleStabReal(dt=dt, max_steps=max_steps) assert qcp_st is not None qcp_su = QCartPoleSwingUpReal(dt=dt, max_steps=max_steps) assert qcp_su is not None qqr = QQubeReal(dt=dt, max_steps=max_steps) assert qqr is not None
if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from ex_dir = ask_for_experiment() if args.ex_dir is None else args.ex_dir ex_tag = ex_dir.split('--', 1)[1] # Load the policy and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir, args) if args.verbose: print(f'Policy params:\n{policy.param_values.detach().cpu().numpy()}') # Create real-world counterpart (without domain randomization) env_real = QBallBalancerReal(args.dt) print_cbt('Set up the QBallBalancerReal environment.', 'c') # Set up PD-controller pdctrl = QBallBalancerPDCtrl(env_real.spec) if args.random_init_state: # Random initial state min_init_state = np.array([0.7 * 0.275 / 2, 0]) max_init_state = np.array([0.8 * 0.275 / 2, 2 * np.pi]) init_space = Polar2DPosSpace(min_init_state, max_init_state) init_state = init_space.sample_uniform() else: # Center of the plate is initial state # init_state = np.zeros(2) # init_state = np.array([1/np.sqrt(2), 1/np.sqrt(2)]) # init_state = np.array([1., 0.])
from pyrado.utils.input_output import print_cbt from pyrado.utils.argparser import get_argparser if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Get the experiment's directory to load from ex_dir = ask_for_experiment() # Load the policy (trained in simulation) and the environment (for constructing the real-world counterpart) env_sim, policy, _ = load_experiment(ex_dir) # Detect the correct real-world counterpart and create it if isinstance(inner_env(env_sim), QBallBalancerSim): env_real = QBallBalancerReal(dt=args.dt, max_steps=args.max_steps) elif isinstance(inner_env(env_sim), QCartPoleSim): env_real = QCartPoleReal(dt=args.dt, max_steps=args.max_steps) elif isinstance(inner_env(env_sim), QQubeSim): env_real = QQubeReal(dt=args.dt, max_steps=args.max_steps) else: raise pyrado.TypeErr( given=env_sim, expected_type=[QBallBalancerSim, QCartPoleSim, QQubeSim]) print_cbt(f'Set up env {env_real.name}.', 'c') # Finally wrap the env in the same as done during training env_real = wrap_like_other_env(env_real, env_sim) # Run on device done = False
default=2e-5, help='Step size for the action') parser.add_argument( '--thold_theta_deg', type=float, default=0.1, help='Threshold for the servo shafts to rotate in degree') parser.add_argument('--thold_x', type=float, default=5e-4, help='Threshold for the cart to move in meter') args = parser.parse_args() if args.env_name == QBallBalancerReal.name: # Create the interface for communicating with the device env = QBallBalancerReal(args.dt, args.max_steps) print_cbt('Set up the QBallBalancerReal environment.', 'c') # Initialize thold_theta = args.thold_theta_deg / 180. * np.pi tholds_V = np.zeros( (args.num_runs, 2, 2)) # num_runs matrices in format [[-x, +x], [-y, +y]] env.reset() # sends a zero action until sth else is commanded input( 'Servo voltages set to [0, 0]. Prepare and then press enter to start.' ) # Run for r in range(args.num_runs):
Returns: The bounded variable. """ return np.maximum(min_value, np.minimum(x, max_value)) # python ... -env-name qcp-st if __name__ == '__main__': # Parse command line arguments args = get_argparser().parse_args() # Set up PD-controller if args.env_name in QBallBalancerReal.name: env = QBallBalancerReal(args.dt, args.max_steps) policy = QBallBalancerPDCtrl(env.spec, kp=to.diag(to.tensor([3.45, 3.45])), kd=to.diag(to.tensor([2.11, 2.11]))) print_cbt('Set up controller for the QBallBalancerReal environment.', 'c') elif args.env_name == QCartPoleStabReal.name: env = QCartPoleStabReal(args.dt, args.max_steps) policy = CartpoleStabilizerPolicy(env.spec, K=np.array([ 1.2278416e+00, 4.5279346e+00, -1.2385756e-02, 6.0038762e+00, -4.1818547e+00 ])) # policy = QCartPoleSwingUpAndBalanceCtrl(env.spec)
def default_qqbb_real(): return QBallBalancerReal(dt=1 / 500.0, max_steps=500)