Beispiel #1
0
def get_uniform_masses_lengths_randomizer_qq(frac_halfspan: float):
    """
    Get a uniform randomizer that applies to all masses and lengths of the Quanser Qube according to a fraction of their
    nominal parameter values

    :param frac_halfspan: fraction of the nominal parameter value
    :return: `DomainRandomizer` with uniformly distributed masses and lengths
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim
    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        UniformDomainParam(name='Mp',
                           mean=dp_nom['Mp'],
                           halfspan=dp_nom['Mp'] / frac_halfspan,
                           clip_lo=1e-3),
        UniformDomainParam(name='Mr',
                           mean=dp_nom['Mr'],
                           halfspan=dp_nom['Mr'] / frac_halfspan,
                           clip_lo=1e-3),
        UniformDomainParam(name='Lr',
                           mean=dp_nom['Lr'],
                           halfspan=dp_nom['Lr'] / frac_halfspan,
                           clip_lo=1e-2),
        UniformDomainParam(name='Lp',
                           mean=dp_nom['Lp'],
                           halfspan=dp_nom['Lp'] / frac_halfspan,
                           clip_lo=1e-2),
    )
def create_default_randomizer_qq() -> DomainRandomizer:
    """
    Create the default randomizer for the `QQubeSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim

    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-3),
        NormalDomainParam(name="motor_resistance",
                          mean=dp_nom["motor_resistance"],
                          std=dp_nom["motor_resistance"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="motor_back_emf",
                          mean=dp_nom["motor_back_emf"],
                          std=dp_nom["motor_back_emf"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="mass_rot_pole",
                          mean=dp_nom["mass_rot_pole"],
                          std=dp_nom["mass_rot_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="length_rot_pole",
                          mean=dp_nom["length_rot_pole"],
                          std=dp_nom["length_rot_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="damping_rot_pole",
                          mean=dp_nom["damping_rot_pole"],
                          std=dp_nom["damping_rot_pole"] / 4,
                          clip_lo=1e-9),
        NormalDomainParam(name="mass_pend_pole",
                          mean=dp_nom["mass_pend_pole"],
                          std=dp_nom["mass_pend_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="length_pend_pole",
                          mean=dp_nom["length_pend_pole"],
                          std=dp_nom["length_pend_pole"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(
            name="damping_pend_pole",
            mean=dp_nom["damping_pend_pole"],
            std=dp_nom["damping_pend_pole"] / 4,
            clip_lo=1e-9,
        ),
    )
Beispiel #3
0
def get_default_randomizer_qq() -> DomainRandomizer:
    """
    Get the default randomizer for the `QQubeSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.quanser_qube import QQubeSim
    dp_nom = QQubeSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g',
                          mean=dp_nom['g'],
                          std=dp_nom['g'] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name='Rm',
                          mean=dp_nom['Rm'],
                          std=dp_nom['Rm'] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name='km',
                          mean=dp_nom['km'],
                          std=dp_nom['km'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Mr',
                          mean=dp_nom['Mr'],
                          std=dp_nom['Mr'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Lr',
                          mean=dp_nom['Lr'],
                          std=dp_nom['Lr'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Dr',
                          mean=dp_nom['Dr'],
                          std=dp_nom['Dr'] / 5,
                          clip_lo=1e-9),
        NormalDomainParam(name='Mp',
                          mean=dp_nom['Mp'],
                          std=dp_nom['Mp'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Lp',
                          mean=dp_nom['Lp'],
                          std=dp_nom['Lp'] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name='Dp',
                          mean=dp_nom['Dp'],
                          std=dp_nom['Dp'] / 5,
                          clip_lo=1e-9))
Beispiel #4
0
"""
Test predefined energy-based controller to make the Quanser Qube swing up.
"""
import torch as to

from pyrado.environments.pysim.quanser_qube import QQubeSim
from pyrado.domain_randomization.utils import print_domain_params
from pyrado.policies.environment_specific import QQubeSwingUpAndBalanceCtrl
from pyrado.sampling.rollout import rollout, after_rollout_query
from pyrado.utils.data_types import RenderMode
from pyrado.utils.input_output import print_cbt


if __name__ == '__main__':
    # Set up environment
    env = QQubeSim(dt=1/500., max_steps=4000)

    # Set up policy
    policy = QQubeSwingUpAndBalanceCtrl(env.spec)

    # Simulate
    done, param, state = False, None, None
    while not done:
        ro = rollout(env, policy, render_mode=RenderMode(text=False, video=True), eval=True,
                     reset_kwargs=dict(domain_param=param, init_state=state))
        print_domain_params(env.domain_param)
        print_cbt(f'Return: {ro.undiscounted_return()}', 'g', bright=True)
        done, state, param = after_rollout_query(env, policy, ro)
Beispiel #5
0
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env_hparams = dict(dt=1 / 100., max_steps=600)
    env = QQubeSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=trial.suggest_categorical(
            'shared_hidden_sizes_policy',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        shared_hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('shared_hidden_nonlin_policy',
                                      ['to_tanh', 'to_relu'])),
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    q_fcn_hparam = dict(
        hidden_sizes=trial.suggest_categorical(
            'hidden_sizes_critic',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('hidden_nonlin_critic',
                                      ['to_tanh', 'to_relu'])),
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    q_fcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)
    q_fcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)

    # Algorithm
    algo_hparam = dict(
        num_sampler_envs=1,  # parallelize via optuna n_jobs
        max_iter=100 * env.max_steps,
        min_steps=trial.suggest_categorical(
            'min_steps_algo', [1]),  # , 10, env.max_steps, 10*env.max_steps
        memory_size=trial.suggest_loguniform('memory_size_algo',
                                             1e2 * env.max_steps,
                                             1e4 * env.max_steps),
        tau=trial.suggest_uniform('tau_algo', 0.99, 1.),
        alpha_init=trial.suggest_uniform('alpha_init_algo', 0.1, 0.9),
        learn_alpha=trial.suggest_categorical('learn_alpha_algo',
                                              [True, False]),
        standardize_rew=trial.suggest_categorical('standardize_rew_algo',
                                                  [False]),
        gamma=trial.suggest_uniform('gamma_algo', 0.99, 1.),
        target_update_intvl=trial.suggest_categorical(
            'target_update_intvl_algo', [1, 5]),
        num_batch_updates=trial.suggest_categorical('num_batch_updates_algo',
                                                    [1, 5]),
        batch_size=trial.suggest_categorical('batch_size_algo',
                                             [128, 256, 512]),
        lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3),
    )
    csv_logger = create_csv_step_logger(
        osp.join(ex_dir, f'trial_{trial.number}'))
    algo = SAC(ex_dir,
               env,
               policy,
               q_fcn_1,
               q_fcn_2,
               **algo_hparam,
               logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(
        env, policy, num_envs=1,
        min_rollouts=min_rollouts)  # parallelize via optuna n_jobs
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts

    return mean_ret
Beispiel #6
0
from pyrado.policies.features import FeatureStack, identity_feat, sign_feat, abs_feat, squared_feat, qubic_feat, \
    bell_feat, RandFourierFeat, MultFeat
from pyrado.policies.linear import LinearPolicy
import torch as to

if __name__ == '__main__':
    # Experiment (set seed before creating the modules)
    # ex_dir = setup_experiment(QQubeSim.name, PoWER.name, f'{LinearPolicy}_actnorm', seed=1)
    ex_dir = setup_experiment(QQubeSim.name,
                              PoWER.name,
                              QQubeSwingUpAndBalanceCtrl.name,
                              seed=1)

    # Environment
    env_hparams = dict(dt=1 / 500., max_steps=5000)
    env = QQubeSim(**env_hparams)
    # env = ActNormWrapper(env)

    # Policy
    # policy_hparam = dict(
    #     # feats=FeatureStack([RandFourierFeat(env.obs_space.flat_dim, num_feat=20, bandwidth=env.obs_space.bound_up)])
    #     feats=FeatureStack([identity_feat, sign_feat, abs_feat, squared_feat,
    #                         MultFeat([2, 5]), MultFeat([3, 5]), MultFeat([4, 5])])
    # )
    # policy = LinearPolicy(spec=env.spec, **policy_hparam)
    # policy_hparam = dict(energy_gain=0.587, ref_energy=0.827)
    policy_hparam = dict(
        ref_energy=0.02,
        energy_gain=50.,
        energy_th_gain=0.3,  # This parameter is fixed. (requires_grad = False)
        acc_max=5.,
Beispiel #7
0
def default_qq():
    return QQubeSim(dt=0.004, max_steps=4000)
Beispiel #8
0
    bell_feat, MultFeat
from pyrado.policies.linear import LinearPolicy
from pyrado.utils.experiments import wrap_like_other_env

if __name__ == '__main__':
    # Experiment (set seed before creating the modules)
    # ex_dir = setup_experiment(QQubeSim.name, f'{BayRn.name}_{PoWER.name}-sim2sim', '100Hz_lin_dr-Mp+', seed=111)
    ex_dir = setup_experiment(
        QQubeSim.name,
        f'{BayRn.name}_{PoWER.name}-sim2sim',
        f'{QQubeSwingUpAndBalanceCtrl.name}_100Hz_dr-Mp+Mr+',
        seed=1111)

    # Environments
    env_hparams = dict(dt=1 / 100., max_steps=600)
    env_sim = QQubeSim(**env_hparams)
    env_sim = DomainRandWrapperLive(env_sim, get_zero_var_randomizer(env_sim))
    dp_map = get_default_domain_param_map_qq()
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    env_real = QQubeSim(**env_hparams)
    env_real.domain_param = dict(Mp=0.026, Mr=0.097)
    # env_real = QQubeReal(**env_hparams)
    env_real = wrap_like_other_env(env_real, env_sim)

    # Policy
    # policy_hparam = dict(
    #     feats=FeatureStack([identity_feat, sign_feat, abs_feat, squared_feat, qubic_feat,
    #                         MultFeat([2, 5]), MultFeat([3, 5]), MultFeat([4, 5])])
    # )
    # policy = LinearPolicy(spec=env_sim.spec, **policy_hparam)
Beispiel #9
0
from pyrado.logger.experiment import ask_for_experiment
from pyrado.policies.environment_specific import QQubeSwingUpAndBalanceCtrl
from pyrado.sampling.rollout import rollout, after_rollout_query
from pyrado.utils.argparser import get_argparser
from pyrado.utils.experiments import load_experiment
from pyrado.utils.input_output import print_cbt
from pyrado.utils.data_types import RenderMode
import torch as to
import numpy as np

if __name__ == '__main__':
    # Parse command line arguments
    args = get_argparser().parse_args()

    # Load the environment and the policy
    env = QQubeSim(args.dt, args.max_steps)  # runs infinitely by default

    # policy = QQubeSwingUpAndBalanceCtrl(env.spec)

    # policy = QQubeSwingUpAndBalanceCtrl(
    #     env.spec,
    #     ref_energy=0.04,  # Quanser's value: 0.02
    #     energy_gain=30.,  # Quanser's value: 50
    #     energy_th_gain=0.4,  # former: 0.4
    #     acc_max=5.,  # Quanser's value: 6
    #     alpha_max_pd_enable=10.,  # Quanser's value: 20
    #     pd_gains=to.tensor([-0.42, 18.45, -0.53, 1.53]))

    # QUANSER
    # policy = QQubeSwingUpAndBalanceCtrl(
    #     env.spec,
Beispiel #10
0
        # param_spec['m_pole'] = np.linspace(0.127*0.7, 0.127*1.3, num=11, endpoint=True)
        # param_spec['l_pole'] = np.linspace(0.641/2*0.7, 0.641/2*1.3, num=11, endpoint=True)

        # Get the experiments' directories to load from
        prefixes = [
            osp.join(pyrado.EXP_DIR, 'FILL_IN', 'FILL_IN'),
        ]
        exp_names = [
            '',
        ]
        exp_labels = [
            '',
        ]

    elif args.env_name == QQubeSim.name:
        env = QQubeSim(dt=args.dt, max_steps=args.max_steps)

        # param_spec['g'] = np.linspace(9.81*0.7, 9.81*1.3, num=11, endpoint=True)
        # param_spec['Rm'] = np.linspace(8.4*0.7, 8.4*1.3, num=11, endpoint=True)
        # param_spec['km'] = np.linspace(0.042*0.7, 0.042*1.3, num=11, endpoint=True)
        # param_spec['Mr'] = np.linspace(0.095*0.7, 0.095*1.3, num=11, endpoint=True)
        # param_spec['Lr'] = np.linspace(0.085*0.7, 0.085*1.3, num=11, endpoint=True)
        # param_spec['Dr'] = np.linspace(5e-6*0.2, 5e-6*5, num=11, endpoint=True)  # 5e-6
        # param_spec['Mp'] = np.linspace(0.024*0.7, 0.024*1.3, num=11, endpoint=True)
        # param_spec['Lp'] = np.linspace(0.129*0.7, 0.129*1.3, num=11, endpoint=True)
        # param_spec['Dp'] = np.linspace(1e-6*0.2, 1e-6n*5, num=11, endpoint=True)  # 1e-6

        # Get the experiments' directories to load from
        prefixes = [
            osp.join(pyrado.EXP_DIR, 'FILL_IN', 'FILL_IN'),
        ]
Beispiel #11
0
        print_cbt(f'Set maximum number of time steps to {args.max_steps}', 'y')

    # ex_dir = input('Enter a root directory that contains one or more experiment directories:\n')
    # Get the experiment's directory to load from
    ex_dir = ask_for_experiment()
    dirs = [x[0] for x in os.walk(ex_dir)][1:]
    num_policies = len(dirs)
    print(f'Found {num_policies} policies.')

    # Specify domain parameters
    param_names = ['Dp', 'Dr', 'Mp', 'Mr', 'Lp', 'Lr']
    num_param = len(param_names)
    num_samples = 10

    # Create one-dim evaluation grid for multiple parameters
    nom_params = QQubeSim.get_nominal_domain_param()
    param_values = dict(
        Dp=np.logspace(-8, -4, num_samples),
        Dr=np.logspace(-8, -4, num_samples),
        Mp=np.linspace(0.6 * nom_params['Mp'], 1.5 * nom_params['Mp'],
                       num_samples),
        Mr=np.linspace(0.6 * nom_params['Mr'], 1.5 * nom_params['Mr'],
                       num_samples),
        Lp=np.linspace(0.6 * nom_params['Lp'], 1.5 * nom_params['Lp'],
                       num_samples),
        Lr=np.linspace(0.6 * nom_params['Lr'], 1.5 * nom_params['Lr'],
                       num_samples),
    )

    # Set up the environment
    env = ActNormWrapper(QQubeSim(dt=1 / 100., max_steps=args.max_steps))
Beispiel #12
0
from pyrado.algorithms.bayrn import BayRn
from pyrado.logger.experiment import setup_experiment, save_list_of_dicts_to_yaml
from pyrado.policies.fnn import FNNPolicy
from pyrado.policies.rnn import LSTMPolicy, GRUPolicy
from pyrado.utils.data_types import EnvSpec
from pyrado.utils.experiments import wrap_like_other_env


if __name__ == '__main__':
    # Experiment (set seed before creating the modules)
    ex_dir = setup_experiment(QQubeSim.name, f'{BayRn.name}_{PPO.name}',
                              f'{FNNPolicy.name}_actnorm_dr-Mp-Mr-Lp-Lr', seed=111)

    # Environments
    env_hparams = dict(dt=1/100., max_steps=600)
    env_sim = QQubeSim(**env_hparams)
    env_sim = ActNormWrapper(env_sim)
    env_sim = DomainRandWrapperLive(env_sim, get_zero_var_randomizer(env_sim))
    dp_map = get_default_domain_param_map_qq()
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    env_real = QQubeReal(**env_hparams)
    env_real = wrap_like_other_env(env_real, env_sim)

    # Policy
    policy_hparam = dict(hidden_sizes=[32, 32], hidden_nonlin=to.tanh)  # FNN
    # policy_hparam = dict(hidden_size=32, num_recurrent_layers=1)  # LSTM & GRU
    policy = FNNPolicy(spec=env_sim.spec, **policy_hparam)
    # policy = RNNPolicy(spec=env_sim.spec, **policy_hparam)
    # policy = LSTMPolicy(spec=env_sim.spec, **policy_hparam)
    # policy = GRUPolicy(spec=env_sim.spec, **policy_hparam)
Beispiel #13
0
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]):
    """
    Objective function for the Optuna `Study` to maximize.
    
    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env_hparams = dict(dt=1/100., max_steps=600)
    env = QQubeSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        hidden_sizes=trial.suggest_categorical('hidden_sizes_policy', [[16, 16], [32, 32], [64, 64]]),
        hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_policy', ['to_tanh', 'to_relu'])),
    )  # FNN
    # policy_hparam = dict(
    #     hidden_size=trial.suggest_categorical('hidden_size_policy', [16, 32, 64]),
    #     num_recurrent_layers=trial.suggest_categorical('num_recurrent_layers_policy', [1, 2]),
    # )  # LSTM & GRU
    policy = FNNPolicy(spec=env.spec, **policy_hparam)
    # policy = GRUPolicy(spec=env.spec, **policy_hparam)

    # Critic
    value_fcn_hparam = dict(
        hidden_sizes=trial.suggest_categorical('hidden_sizes_critic', [[16, 16], [32, 32], [64, 64]]),
        hidden_nonlin=fcn_from_str(trial.suggest_categorical('hidden_nonlin_critic', ['to_tanh', 'to_relu'])),
    )
    # value_fcn_hparam = dict(
    #     hidden_size=trial.suggest_categorical('hidden_size_critic', [16, 32, 64]),
    #     num_recurrent_layers=trial.suggest_categorical('num_recurrent_layers_critic', [1, 2]),
    # )  # LSTM & GRU
    value_fcn = FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), **value_fcn_hparam)
    # value_fcn = GRUPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace), **value_fcn_hparam)
    critic_hparam = dict(
        gamma=trial.suggest_uniform('gamma_critic', 0.98, 1.),
        lamda=trial.suggest_uniform('lamda_critic', 0.95, 1.),
        num_epoch=trial.suggest_int('num_epoch_critic', 1, 10),
        batch_size=150,
        lr=trial.suggest_loguniform('lr_critic', 1e-5, 1e-3),
        standardize_adv=trial.suggest_categorical('standardize_adv_critic', [False]),
        max_grad_norm=trial.suggest_categorical('max_grad_norm_critic', [None, 1., 5.]),
        # lr_scheduler=scheduler.StepLR,
        # lr_scheduler_hparam=dict(step_size=10, gamma=0.9)
        # lr_scheduler=scheduler.ExponentialLR,
        # lr_scheduler_hparam=dict(gamma=0.99)
    )
    critic = GAE(value_fcn, **critic_hparam)

    # Algorithm
    algo_hparam = dict(
        num_sampler_envs=1,  # parallelize via optuna n_jobs
        max_iter=300,
        min_steps=trial.suggest_int('num_rollouts_algo', 10, 30)*env.max_steps,
        num_epoch=trial.suggest_int('num_epoch_algo', 1, 10),
        eps_clip=trial.suggest_uniform('eps_clip_algo', 0.05, 0.2),
        batch_size=150,
        std_init=trial.suggest_uniform('std_init_algo', 0.6, 1.0),
        lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3),
        max_grad_norm=trial.suggest_categorical('max_grad_norm_algo', [None, 1., 5.]),
        # lr_scheduler=scheduler.StepLR,
        # lr_scheduler_hparam=dict(step_size=10, gamma=0.9)
        # lr_scheduler=scheduler.ExponentialLR,
        # lr_scheduler_hparam=dict(gamma=0.99)
    )
    csv_logger = create_csv_step_logger(osp.join(ex_dir, f'trial_{trial.number}'))
    algo = PPO(osp.join(ex_dir, f'trial_{trial.number}'), env, policy, critic, **algo_hparam, logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(env, policy, num_envs=1, min_rollouts=min_rollouts)  # parallelize via optuna n_jobs
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros])/min_rollouts

    return mean_ret