def __init__(self,
                 env_spec: EnvSpec,
                 ref_energy: float,
                 energy_gain: float,
                 th_gain: float,
                 acc_max: float,
                 use_cuda: bool = False):
        """
        Constructor

        :param env_spec: environment specification
        :param ref_energy: reference energy level [J]
        :param energy_gain: P-gain on the energy [m/s/J]
        :param th_gain: P-gain on angle theta
        :param acc_max: maximum linear acceleration of the pendulum pivot [m/s**2]
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        super().__init__(env_spec, use_cuda)

        # Initialize parameters
        self._log_E_ref = nn.Parameter(to.log(to.tensor(ref_energy)), requires_grad=True)
        self._log_E_gain = nn.Parameter(to.log(to.tensor(energy_gain)), requires_grad=True)
        self._th_gain = nn.Parameter(to.tensor(th_gain), requires_grad=True)
        self.acc_max = to.tensor(acc_max)
        self.dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
Exemple #2
0
    def __init__(
        self,
        env_spec: EnvSpec,
        ref_energy: float,
        energy_gain: float,
        th_gain: float,
        acc_max: float,
        reset_domain_param: bool = True,
        use_cuda: bool = False,
    ):
        """
        Constructor

        :param env_spec: environment specification
        :param ref_energy: reference energy level [J]
        :param energy_gain: P-gain on the energy [m/s/J]
        :param th_gain: P-gain on angle theta
        :param acc_max: maximum linear acceleration of the pendulum pivot [m/s**2]
        :param reset_domain_param: if `True` the domain parameters are reset if the they are present as a entry in the
                                   kwargs passed to `reset()`. If `False` they are ignored.
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        super().__init__(env_spec, use_cuda)

        # Initial parameters
        self._log_E_ref_init = to.log(to.tensor(ref_energy))
        self._log_E_gain_init = to.log(to.tensor(energy_gain))
        self._th_gain_init = to.tensor(th_gain)

        # Define parameters
        self._log_E_ref = nn.Parameter(to.empty_like(self._log_E_ref_init),
                                       requires_grad=True)
        self._log_E_gain = nn.Parameter(to.empty_like(self._log_E_gain_init),
                                        requires_grad=True)
        self._th_gain = nn.Parameter(to.empty_like(self._th_gain_init),
                                     requires_grad=True)

        self.acc_max = to.tensor(acc_max)
        self._domain_param = QQubeSwingUpSim.get_nominal_domain_param()
        self._reset_domain_param = reset_domain_param

        # Default initialization
        self.init_param(None)
        max_iter=200,
        eps_clip=0.12648736789309026,
        min_steps=30 * env_sim.max_steps,
        num_epoch=7,
        batch_size=500,
        std_init=0.7573286998997557,
        lr=6.999956625305722e-04,
        max_grad_norm=1.0,
        num_workers=8,
        lr_scheduler=lr_scheduler.ExponentialLR,
        lr_scheduler_hparam=dict(gamma=0.999),
    )
    subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 5000,
            0.8 * dp_nom["mass_rot_pole"],
            dp_nom["mass_rot_pole"] / 5000,
            0.8 * dp_nom["length_pend_pole"],
            dp_nom["length_pend_pole"] / 5000,
            0.8 * dp_nom["length_rot_pole"],
            dp_nom["length_rot_pole"] / 5000,
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 20,
            1.2 * dp_nom["mass_rot_pole"],
    ex_dir = setup_experiment(
        QQubeSwingUpSim.name, f"{NPDR.name}_{QQubeSwingUpAndBalanceCtrl.name}",
        "sim2sim")

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Environments
    env_sim_hparams = dict(dt=1 / 250.0, max_steps=1500)
    env_sim = QQubeSwingUpSim(**env_sim_hparams)
    env_sim = ActDelayWrapper(env_sim)

    # Create a fake ground truth target domain
    num_real_rollouts = 2
    env_real = deepcopy(env_sim)
    dp_nom = env_sim.get_nominal_domain_param()
    env_real.domain_param = dict(
        damping_rot_pole=dp_nom["damping_rot_pole"] * 1.9,
        damping_pend_pole=dp_nom["damping_pend_pole"] * 0.4,
        motor_resistance=dp_nom["motor_resistance"] * 1.0,
        motor_back_emf=dp_nom["motor_back_emf"] * 1.0,
        mass_pend_pole=dp_nom["mass_pend_pole"] * 1.1,
        mass_rot_pole=dp_nom["mass_rot_pole"] * 1.2,
        length_pend_pole=dp_nom["length_pend_pole"] * 0.8,
        length_rot_pole=dp_nom["length_rot_pole"] * 0.9,
        gravity_const=dp_nom["gravity_const"] * 1.0,
    )
    # randomizer = DomainRandomizer(
    #     NormalDomainParam(name="damping_rot_pole", mean=dp_nom["damping_rot_pole"] * 2.0, std=dp_nom["motor_back_emf"] / 10, clip_lo=0.0),
    #     NormalDomainParam(name="damping_pend_pole", mean=dp_nom["damping_pend_pole"] * 2.0, std=dp_nom["motor_back_emf"] / 10, clip_lo=0.0),
    #     NormalDomainParam(name="motor_resistance", mean=dp_nom["motor_resistance"] * 1.1, std=dp_nom["motor_back_emf"] / 50, clip_lo=0.0),