Example #1
0
    def _create_spaces(self):
        # State space (normalized time, since we do not have a simulation)
        self._state_space = BoxSpace(np.array([0.]), np.array([1.]))

        # Action space (PD controller on 3 joint positions and velocities)
        max_act = np.array([
            np.pi,
            np.pi,
            np.pi,  # [rad, rad, rad, ...
            10 * np.pi,
            10 * np.pi,
            10 * np.pi
        ])  # ... rad/s, rad/s, rad/s]
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=[
                                       r'$q_{1,des}$', r'$q_{3,des}$',
                                       r'$q_{5,des}$', r'$\dot{q}_{1,des}$',
                                       r'$\dot{q}_{3,des}$',
                                       r'$\dot{q}_{5,des}$'
                                   ])

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['$t$'])
Example #2
0
    def _create_spaces(self):
        # State space (normalized time, since we do not have a simulation)
        self._state_space = BoxSpace(np.array([0.]), np.array([1.]))

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])
Example #3
0
    def _create_spaces(self):
        # State space (joint positions and velocities)
        state_shape = (2 * self.num_dof, )
        state_up, state_lo = np.full(state_shape, pyrado.inf), np.full(
            state_shape, -pyrado.inf)
        self._state_space = BoxSpace(state_lo, state_up)

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])
Example #4
0
 def obs_space(self) -> Space:
     lo = np.concatenate([
         WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
         WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
     ])
     up = np.concatenate([
         WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
         WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
     ])
     return BoxSpace(bound_lo=lo, bound_up=up)
Example #5
0
    def obs_space(self) -> Space:
        # Observation space (normalized time and optionally cup and ball position)
        obs_lo, obs_up, labels = [0.0], [1.0], ["t"]

        if self.observe_ball:
            obs_lo.extend([-3.0, -3.0])
            obs_up.extend([3.0, 3.0])
            labels.extend(["ball_x", "ball_z"])
        if self.observe_cup:
            obs_lo.extend([-3.0, -3.0])
            obs_up.extend([3.0, 3.0])
            labels.extend(["cup_x", "cup_z"])

        return BoxSpace(obs_lo, obs_up, labels=labels)
Example #6
0
def test_bayrn_power(ex_dir, env: SimEnv, bayrn_hparam: dict):
    pyrado.set_seed(0)

    # Environments and domain randomization
    env_real = deepcopy(env)
    env_sim = DomainRandWrapperLive(env, create_zero_var_randomizer(env))
    dp_map = create_default_domain_param_map_qq()
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)
    env_real.domain_param = dict(mass_pend_pole=0.024 * 1.1,
                                 mass_rot_pole=0.095 * 1.1)
    env_real = wrap_like_other_env(env_real, env_sim)

    # Policy and subroutine
    policy_hparam = dict(energy_gain=0.587, ref_energy=0.827)
    policy = QQubeSwingUpAndBalanceCtrl(env_sim.spec, **policy_hparam)
    subrtn_hparam = dict(
        max_iter=1,
        pop_size=8,
        num_init_states_per_domain=1,
        num_is_samples=4,
        expl_std_init=0.1,
        num_workers=1,
    )
    subrtn = PoWER(ex_dir, env_sim, policy, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = inner_env(env_sim).get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"], 1e-8,
            0.8 * dp_nom["mass_rot_pole"], 1e-8
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"], 1e-7,
            1.2 * dp_nom["mass_rot_pole"], 1e-7
        ]),
    )

    # Create algorithm and train
    algo = BayRn(ex_dir,
                 env_sim,
                 env_real,
                 subrtn,
                 ddp_space,
                 **bayrn_hparam,
                 num_workers=1)
    algo.train()

    assert algo.curr_iter == algo.max_iter or algo.stopping_criterion_met()
Example #7
0
 def state_space(self) -> Space:
     lo = np.concatenate([
         WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
         WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
     ])
     up = np.concatenate([
         WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
         WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
     ])
     return BoxSpace(
         bound_lo=lo,
         bound_up=up,
         labels=[f"q_{i}"
                 for i in range(1, 8)] + [f"qd_{i}" for i in range(1, 8)],
     )
    def __init__(
        self,
        mapping: Dict[int, Tuple[str, str]],
        trafo_mask: Union[list, to.Tensor],
        prior: DomainRandomizer = None,
        scale_params: bool = False,
        use_cuda: bool = False,
    ):
        """
        Constructor

        :param mapping: mapping from subsequent integers to domain distribution parameters, where the first string of
                        the value tuple is the name of the domain parameter (e.g. mass, length) and the second string is
                        the name of the distribution parameter (e.g. mean, std).  The integers are indices of the numpy
                        array which come from the algorithm.
        :param trafo_mask: every domain parameter that is set to `True` in this mask will be learned via a 'virtual'
                           parameter, i.e. in sqrt-space, and then finally squared to retrieve the domain parameter.
                           This transformation is useful to avoid setting a negative variance.
        :param prior: prior believe about the distribution parameters in from of a `DomainRandomizer`
        :param scale_params: if `True`, the sqrt-transformed policy parameters are scaled in the range of $[-0.5, 0.5]$.
                             The advantage of this is to make the parameter-based exploration easier.
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        if not isinstance(mapping, dict):
            raise pyrado.TypeErr(given=mapping, expected_type=dict)
        if not len(trafo_mask) == len(mapping):
            raise pyrado.ShapeErr(given=trafo_mask, expected_match=mapping)

        self._mapping = mapping
        self.mask = to.tensor(trafo_mask, dtype=to.bool)
        self.prior = prior
        self._scale_params = scale_params

        # Define the parameter space by using the Policy.env_spec.act_space
        param_spec = EnvSpec(obs_space=EmptySpace(),
                             act_space=BoxSpace(-pyrado.inf,
                                                pyrado.inf,
                                                shape=len(mapping)))

        # Call Policy's constructor
        super().__init__(param_spec, use_cuda)

        self.params = nn.Parameter(to.empty(param_spec.act_space.flat_dim),
                                   requires_grad=True)
        if self._scale_params:
            self.param_scaler = None  # initialized during init_param()
        self.init_param(prior=prior)
Example #9
0
    def state_space(self) -> Space:
        # State space (joint positions and velocities)
        state_lo = np.concatenate([
            WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
            WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
        ])
        state_up = np.concatenate([
            WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
            WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
        ])

        # Ball and cup (x,y,z)-space
        if self.observe_ball:
            state_lo = np.r_[state_lo, np.full((3, ), -3.0)]
            state_up = np.r_[state_up, np.full((3, ), 3.0)]
        if self.observe_cup:
            state_lo = np.r_[state_lo, np.full((3, ), -3.0)]
            state_up = np.r_[state_up, np.full((3, ), 3.0)]

        return BoxSpace(state_lo, state_up)
Example #10
0
WAM_Q_LIMITS_LO_7DOF = np.array([-2.6, -2.0, -2.8, -0.9, -4.76, -1.6, -3.0
                                 ]) + 5 * np.pi / 180
WAM_Q_LIMITS_UP_7DOF = np.array([+2.6, +2.0, +2.8, +3.1, +1.24, +1.6, +3.0
                                 ]) - 5 * np.pi / 180
# Arbitrarily set velocity limits [rad/s]
WAM_QD_LIMITS_LO_7DOF = -np.array([
    4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi
])
WAM_QD_LIMITS_UP_7DOF = +np.array([
    4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi
])

# Torque limits
# 7 DoF
MAX_TORQUE_7DOF = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0, 2.0])
TORQUE_SPACE_WAM_7DOF = BoxSpace(-MAX_TORQUE_7DOF, MAX_TORQUE_7DOF)
# 4 DoF
MAX_TORQUE_4DOF = MAX_TORQUE_7DOF[:4]
TORQUE_SPACE_WAM_4DOF = BoxSpace(-MAX_TORQUE_4DOF, MAX_TORQUE_4DOF)

# Default PD gains from robcom / SL
WAM_PGAINS_7DOF = np.array([200.0, 300.0, 100.0, 100.0, 10.0, 10.0, 2.5])
WAM_DGAINS_7DOF = np.array([7.0, 15.0, 5.0, 2.5, 0.3, 0.3, 0.05])
WAM_PGAINS_4DOF = WAM_PGAINS_7DOF[:4]
WAM_DGAINS_4DOF = WAM_DGAINS_7DOF[:4]

# Desired initial joint angles
# 7 DoF
INIT_QPOS_DES_7DOF = np.array([0.0, 0.5876, 0.0, 1.36, 0.0, -0.321, -1.57])
# 4 DoF arm
INIT_QPOS_DES_4DOF = np.array([0.0, 0.6, 0.0, 1.25])
Example #11
0
        raise pyrado.ValueErr(
            given=args.env_name,
            eq_constraint=
            f"{QQubeSwingUpReal.name}, {QCartPoleSwingUpReal.name} or {WAMReal.name}"
        )
    print_cbt(f"Set the {env_real.name} environment.", "g")

    # Create the policy
    if args.mode.lower() == "chirp":

        def fcn_of_time(t: float):
            act = max_amp * chirp(t, f0=3, f1=0, t1=t_end, method="linear")
            return act.repeat(env_real.act_space.flat_dim)

    elif args.mode.lower() == "skyline":
        t_intvl_space = BoxSpace(dt, 100 * dt, shape=(1, ))
        val_space = BoxSpace(-abs(max_amp), abs(max_amp), shape=(1, ))
        act_predef = skyline(dt, t_end, t_intvl_space, val_space)[1]

        def fcn_of_time(t: float):
            act = act_predef[int(t / dt)]
            return act.repeat(env_real.act_space.flat_dim)

    elif args.mode.lower() == "sin":

        def fcn_of_time(t: float):
            act = max_amp * np.sin(2 * np.pi * t * 2.0)  # 2 Hz
            return act.repeat(env_real.act_space.flat_dim)

    elif args.mode.lower() == "wam_sin":
        fcn_of_time = wam_jsp_7dof_sin
Example #12
0
from pyrado.policies.feed_forward.linear import LinearPolicy
from pyrado.policies.features import *
from pyrado.policies.recurrent.two_headed_rnn import TwoHeadedRNNPolicyBase
from pyrado.sampling.rollout import rollout
from pyrado.sampling.step_sequence import StepSequence
from pyrado.utils.data_sets import TimeSeriesDataSet
from pyrado.utils.data_types import RenderMode
from pyrado.utils.functions import skyline
from pyrado.utils.nn_layers import IndiNonlinLayer
from tests.conftest import m_needs_bullet, m_needs_mujoco, m_needs_rcs, m_needs_libtorch
from tests.environment_wrappers.mock_env import MockEnv


@pytest.fixture(scope='function',
                params=[
                    (skyline(0.02, 20., BoxSpace(0.5, 3, shape=(1,)), BoxSpace(-2., 3., shape=(1,)))[1],
                     0.7, 50, False, True),
                ],
                ids=['skyline_0.8_50_notstdized_scaled'])
def tsdataset(request):
    return TimeSeriesDataSet(
        data=to.from_numpy(request.param[0]),
        ratio_train=request.param[1],
        window_size=request.param[2],
        standardize_data=request.param[3],
        scale_min_max_data=request.param[4]
    )


@pytest.mark.features
@pytest.mark.parametrize(
    #     max_iter=300,
    #     min_steps=23*env_sim.max_steps,
    #     num_epoch=7,
    #     eps_clip=0.0744,
    #     batch_size=500,
    #     std_init=0.9074,
    #     lr=3.446e-04,
    #     max_grad_norm=1.,
    #     num_workers=12,
    # )
    # subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([0.8*dp_nom['Mp'], 1e-12, 0.8*dp_nom['Mr'], 1e-12]),
        bound_up=np.array([1.2*dp_nom['Mp'], 1e-11, 1.2*dp_nom['Mr'], 1e-11])
    )

    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc='UCB',
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=4,
        warmstart=False,
        num_eval_rollouts_real=10,  # sim-2-sim
        # thold_succ_subrtn=300,
    )
Example #14
0
import numpy as np
import torch as to
from matplotlib import pyplot as plt
from matplotlib import ticker

import pyrado
from pyrado.policies.features import FeatureStack, RBFFeat
from pyrado.policies.linear import LinearPolicy
from pyrado.spaces import BoxSpace
from pyrado.utils.data_types import EnvSpec


if __name__ == '__main__':
    # Define some arbitrary EnvSpec
    obs_space = BoxSpace(bound_lo=np.array([-5., -12.]), bound_up=np.array([10., 6.]))
    act_space = BoxSpace(bound_lo=np.array([-1.]), bound_up=np.array([1.]))
    spec = EnvSpec(obs_space, act_space)

    num_fpd = 5
    num_eval_points = 500

    policy_hparam = dict(
        feats=FeatureStack([RBFFeat(num_feat_per_dim=num_fpd, bounds=obs_space.bounds, scale=None)])
    )
    policy = LinearPolicy(spec, **policy_hparam)

    eval_grid_0 = to.linspace(-5., 10, num_eval_points)
    eval_grid_1 = to.linspace(-12., 6, num_eval_points)
    eval_grid = to.stack([eval_grid_0, eval_grid_1], dim=1)

    feat_vals = to.empty(num_eval_points, num_fpd*obs_space.flat_dim)
Example #15
0
        plt.plot(x_grid, noisy_nonlin_fcn(x=x_grid, f=f, noise_std=noise_std), alpha=0.2)
        plt.scatter(x.data.numpy(), obj_fcn().numpy(), s=16, color=colors[e])

    plt.xlabel("$x$")
    plt.ylabel("$f(x)$")
    plt.legend()
    plt.show()
    assert noisy_nonlin_fcn(x, f=f, noise_std=noise_std) < noisy_nonlin_fcn(x_init, f=f, noise_std=noise_std)


@pytest.mark.parametrize("dt", [0.1], ids=["0.1"])
@pytest.mark.parametrize("t_end", [6.0], ids=["1."])
@pytest.mark.parametrize(
    "t_intvl_space",
    [
        BoxSpace(0.1, 0.11, shape=1),
        BoxSpace(0.123, 0.456, shape=1),
        BoxSpace(10.0, 20.0, shape=1),
    ],
    ids=["small_time_intvl", "real_time_intvl", "large_time_intvl"],
)
@pytest.mark.parametrize("val_space", [BoxSpace(-5.0, 3.0, shape=1)], ids=["-5_to_3"])
def test_skyline(dt: Union[int, float], t_end: Union[int, float], t_intvl_space: BoxSpace, val_space: BoxSpace):
    # Create the skyline function
    t, vals = skyline(dt, t_end, t_intvl_space, val_space)
    assert isinstance(t, np.ndarray) and isinstance(vals, np.ndarray)
    assert len(t) == len(vals)


def test_check_prompt():
    with completion_context("Works fine", color="g"):
    #     num_init_states_per_domain=6,
    #     num_is_samples=10,
    #     expl_std_init=2.0,
    #     expl_std_min=0.02,
    #     symm_sampling=False,
    #     num_workers=32,
    # )
    # subrtn = PoWER(ex_dir, env_sim, policy, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"], 1e-8,
            0.8 * dp_nom["mass_rot_pole"], 1e-8
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"], 1e-7,
            1.2 * dp_nom["mass_rot_pole"], 1e-7
        ]),
    )

    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc="UCB",
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=4,
        warmstart=False,
        num_eval_rollouts_real=100,
Example #17
0
                 alpha=0.2)
        plt.scatter(x.data.numpy(), obj_fcn().numpy(), s=16, color=colors[e])

    plt.xlabel('$x$')
    plt.ylabel('$f(x)$')
    plt.legend()
    plt.show()
    assert noisy_nonlin_fcn(x, f=f, noise_std=noise_std) < noisy_nonlin_fcn(
        x_init, f=f, noise_std=noise_std)


@pytest.mark.parametrize('dt', [0.1], ids=['0.1'])
@pytest.mark.parametrize('t_end', [6.], ids=['1.'])
@pytest.mark.parametrize(
    't_intvl_space', [
        BoxSpace(0.1, 0.11, shape=1),
        BoxSpace(0.123, 0.456, shape=1),
        BoxSpace(10., 20., shape=1),
    ],
    ids=['small_time_intvl', 'real_time_intvl', 'large_time_intvl'])
@pytest.mark.parametrize('val_space', [BoxSpace(-5., 3., shape=1)],
                         ids=['-5_to_3'])
def test_skyline(dt: Union[int, float], t_end: Union[int, float],
                 t_intvl_space: BoxSpace, val_space: BoxSpace):
    # Create the skyline function
    t, vals = skyline(dt, t_end, t_intvl_space, val_space)
    assert isinstance(t, np.ndarray) and isinstance(vals, np.ndarray)
    assert len(t) == len(vals)


def test_check_prompt():
Example #18
0
    if env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt)

    elif env_name == QCartPoleSwingUpSim.name:
        env = QCartPoleSwingUpSim(dt=dt)

    elif env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt)

    elif env_name == "wam-bic":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim

        env = WAMBallInCupSim(num_dof=4)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "wam-jsc":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim

        env = WAMJointSpaceCtrlSim(num_dof=7)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "mg-ik":  # avoid loading _rcsenv
        from pyrado.environments.rcspysim.mini_golf import MiniGolfIKSim

        env = MiniGolfIKSim(dt=dt)
                         std_init=0.7573286998997557,
                         lr=6.999956625305722e-04,
                         max_grad_norm=1.,
                         num_workers=8,
                         lr_scheduler=lr_scheduler.ExponentialLR,
                         lr_scheduler_hparam=dict(gamma=0.999))
    subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(bound_lo=np.array([
        0.8 * dp_nom['Mp'], dp_nom['Mp'] / 5000, 0.8 * dp_nom['Mr'],
        dp_nom['Mr'] / 5000, 0.8 * dp_nom['Lp'], dp_nom['Lp'] / 5000,
        0.8 * dp_nom['Lr'], dp_nom['Lr'] / 5000
    ]),
                         bound_up=np.array([
                             1.2 * dp_nom['Mp'], dp_nom['Mp'] / 20,
                             1.2 * dp_nom['Mr'], dp_nom['Mr'] / 20,
                             1.2 * dp_nom['Lp'], dp_nom['Lp'] / 20,
                             1.2 * dp_nom['Lr'], dp_nom['Lr'] / 20
                         ]))

    # policy_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'policy.pt'))
    # valuefcn_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'valuefcn.pt'))

    # Algorithm
    bayrn_hparam = dict(
        thold_succ=300.,
        max_iter=15,
        acq_fc='EI',
        # acq_param=dict(beta=0.2),
Example #20
0
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array(
            [
                0.95 * dp_nom["rope_length"],
                dp_nom["rope_length"] / 1000,
                0.0 * dp_nom["rope_damping"],
                dp_nom["rope_damping"] / 100,
                0.85 * dp_nom["ball_mass"],
                dp_nom["ball_mass"] / 1000,
                0.0 * dp_nom["joint_2_dryfriction"],
                dp_nom["joint_2_dryfriction"] / 100,
                0.0 * dp_nom["joint_2_damping"],
                dp_nom["joint_2_damping"] / 100,
            ]
        ),
        bound_up=np.array(
            [
                1.05 * dp_nom["rope_length"],
                dp_nom["rope_length"] / 20,
                2 * dp_nom["rope_damping"],
                dp_nom["rope_damping"] / 2,
                1.15 * dp_nom["ball_mass"],
                dp_nom["ball_mass"] / 10,
                2 * dp_nom["joint_2_dryfriction"],
                dp_nom["joint_2_dryfriction"] / 2,
                2 * dp_nom["joint_2_damping"],
                dp_nom["joint_2_damping"] / 2,
            ]
        ),
    )
Example #21
0
    )
    subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 5000,
            0.8 * dp_nom["mass_rot_pole"],
            dp_nom["mass_rot_pole"] / 5000,
            0.8 * dp_nom["length_pend_pole"],
            dp_nom["length_pend_pole"] / 5000,
            0.8 * dp_nom["length_rot_pole"],
            dp_nom["length_rot_pole"] / 5000,
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 20,
            1.2 * dp_nom["mass_rot_pole"],
            dp_nom["mass_rot_pole"] / 20,
            1.2 * dp_nom["length_pend_pole"],
            dp_nom["length_pend_pole"] / 20,
            1.2 * dp_nom["length_rot_pole"],
            dp_nom["length_rot_pole"] / 20,
        ]),
    )

    # policy_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'policy.pt'))
    # valuefcn_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'valuefcn.pt'))

    # Algorithm
Example #22
0
 def state_space(self):
     max_state = np.array([100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0])
     return BoxSpace(-max_state, max_state, labels=["x_1", "x_2", "x_2", "x_3", "x_5", "x_6", "x_7", "x_8"])
Example #23
0
 def state_space(self) -> Space:
     # Normalized time
     return BoxSpace(np.array([0.0]), np.array([1.0]), labels=["t"])
Example #24
0
def default_randomizer():
    return DomainRandomizer(
        NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11),
    )


# --------------
# Other Fixtures
# --------------


@pytest.fixture(
    scope="function",
    params=[
        (skyline(0.02, 20.0, BoxSpace(0.5, 3, shape=(1,)), BoxSpace(-2.0, 3.0, shape=(1,)))[1], 0.7, 50, False, True),
    ],
    ids=["skyline_0.8_50_notstdized_scaled"],
)
def dataset_ts(request):
    return TimeSeriesDataSet(
        data=to.from_numpy(request.param[0]).to(dtype=to.get_default_dtype()),
        ratio_train=request.param[1],
        window_size=request.param[2],
        standardize_data=request.param[3],
        scale_min_max_data=request.param[4],
    )
Example #25
0
    # Select the data set
    data_set_name = args.mode or "skyline"

    # Experiment
    ex_dir = setup_experiment(TSPred.name, ADNPolicy.name)

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Load the data
    if data_set_name == "skyline":
        dt = 0.01
        _, vals = skyline(dt=dt,
                          t_end=20.0,
                          t_intvl_space=BoxSpace(0.5, 3, shape=(1, )),
                          val_space=BoxSpace(-2.0, 3.0, shape=(1, )))
        data = to.from_numpy(vals).to(dtype=to.get_default_dtype()).view(-1, 1)
    else:
        data = pd.read_csv(
            osp.join(pyrado.PERMA_DIR, "misc", f"{data_set_name}.csv"))
        if data_set_name == "daily_min_temperatures":
            data = to.tensor(data["Temp"].values,
                             dtype=to.get_default_dtype()).view(-1, 1)
            dt = 1.0
        elif data_set_name == "monthly_sunspots":
            data = to.tensor(data["Sunspots"].values,
                             dtype=to.get_default_dtype()).view(-1, 1)
            dt = 1.0
        elif "oscillation" in data_set_name:
            data = to.tensor(data["Positions"].values,
        5: ('ball_mass', 'std'),
        6: ('joint_stiction', 'mean'),
        7: ('joint_stiction', 'halfspan'),
        8: ('joint_damping', 'mean'),
        9: ('joint_damping', 'halfspan'),
    }
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Set the boundaries for the GP (must be consistent with dp_map)
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([0.95*dp_nom['rope_length'], dp_nom['rope_length']/1000,
                           0.*dp_nom['rope_damping'], dp_nom['rope_damping']/100,
                           0.85*dp_nom['ball_mass'], dp_nom['ball_mass']/1000,
                           0.*dp_nom['joint_stiction'], dp_nom['joint_stiction']/100,
                           0.*dp_nom['joint_damping'], dp_nom['joint_damping']/100]),
        bound_up=np.array([1.05*dp_nom['rope_length'], dp_nom['rope_length']/20,
                           2*dp_nom['rope_damping'], dp_nom['rope_damping']/2,
                           1.15*dp_nom['ball_mass'], dp_nom['ball_mass']/10,
                           2*dp_nom['joint_stiction'], dp_nom['joint_stiction']/2,
                           2*dp_nom['joint_damping'], dp_nom['joint_damping']/2])
    )

    # Setting the ip address to None ensures that robcom does not try to connect to the server pc
    env_real_hparams = dict(
        num_dof=4,
        max_steps=1750,
    )
    env_real = WAMBallInCupRealEpisodic(**env_real_hparams)

    # Policy
    policy_hparam = dict(
Example #27
0
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import numpy as np

from pyrado.spaces import BoxSpace

# 4 DoF arm, 2 DoF actuated
init_qpos_des_4dof = np.array([0., 0.6, 0., 1.25])
act_min_wam_4dof = np.array([-1.985, -0.9, -10 * np.pi, -10 * np.pi])
act_max_wam_4dof = np.array([1.985, np.pi, 10 * np.pi, 10 * np.pi])
labels_4dof = ['q_1_des', 'q_3_des', 'q_dot_1_des', 'q_dot_3_des']
act_space_wam_4dof = BoxSpace(act_min_wam_4dof,
                              act_max_wam_4dof,
                              labels=labels_4dof)

# 7 DoF arm, 3 DoF actuated
init_qpos_des_7dof = np.array([0., 0.5876, 0., 1.36, 0., -0.321, -1.57])
act_min_wam_7dof = np.array(
    [-1.985, -0.9, -np.pi / 2, -10 * np.pi, -10 * np.pi, -10 * np.pi])
act_max_wam_7dof = np.array(
    [1.985, np.pi, np.pi / 2, 10 * np.pi, 10 * np.pi, 10 * np.pi])
labels_7dof = [
    'q_1_des', 'q_3_des', 'q_5_des', 'q_dot_1_des', 'q_dot_3_des',
    'q_dot_5_des'
]
act_space_wam_7dof = BoxSpace(act_min_wam_7dof,
                              act_max_wam_7dof,
                              labels=labels_7dof)
Example #28
0
    def __call__(self, dp_values: to.Tensor) -> to.Tensor:
        """
        Run one rollout for every domain parameter set. The rollouts are done in segments, and after every segment the
        simulation state is set to the current state in the target domain rollout.

        :param dp_values: tensor containing domain parameters along the 1st dimension
        :return: features computed from the time series data
        """
        dp_values = to.atleast_2d(dp_values).numpy()

        if self.rollouts_real is not None:
            if self.use_rec_act:
                # Create a policy that simply replays the recorded actions
                self._set_action_field(self.rollouts_real)
                policy = PlaybackPolicy(
                    self._env.spec,
                    [ro.get_data_values(self._action_field) for ro in self.rollouts_real],
                    no_reset=True,
                )
            else:
                # Use the current policy to generate the actions
                policy = self._policy

            # The initial states will be set to states which will most likely not the be in the initial state space of
            # the environment, thus we set the initial state space to an infinite space
            self._env.init_space = BoxSpace(
                -pyrado.inf, pyrado.inf, self._env.state_space.shape, labels=self._env.state_space.labels
            )

            data_sim_all = []  # for all target domain rollouts

            # Iterate over domain parameter sets
            for dp_value in dp_values:
                data_sim_one_dp = []  # for all target domain rollouts of one domain parameter set

                # Iterate over target domain rollouts
                for idx_r, ro_real in enumerate(self.rollouts_real):
                    data_one_ro = []
                    ro_real.numpy()

                    # Split the target domain rollout if desired
                    if self.num_segments is not None:
                        segs_real = list(ro_real.split_ordered_batches(num_batches=self.num_segments))
                    else:
                        segs_real = list(ro_real.split_ordered_batches(batch_size=self.len_segments))

                    # Iterate over segments of one target domain rollout
                    cnt_step = 0
                    for seg_real in segs_real:
                        if self.use_rec_act:
                            # Disabled the policy reset of PlaybackPolicy to do it here manually
                            assert policy.no_reset
                            policy.curr_rec = idx_r
                            policy.curr_step = cnt_step

                        # Do the rollout for a segment
                        seg_sim = rollout(
                            self._env,
                            policy,
                            eval=True,
                            reset_kwargs=dict(
                                init_state=seg_real.states[0, :], domain_param=dict(zip(self.dp_names, dp_value))
                            ),
                            stop_on_done=self.stop_on_done,
                            max_steps=seg_real.length,
                        )
                        check_domain_params(seg_sim, dp_value, self.dp_names)
                        if self.use_rec_act:
                            check_act_equal(seg_real, seg_sim, check_applied=self._action_field == "actions_applied")

                        # Pad if necessary
                        StepSequence.pad(seg_sim, seg_real.length)

                        # Increase step counter for next segment
                        cnt_step += seg_real.length

                        # Concatenate states and actions of the simulated and real segments
                        data_one_seg = np.concatenate(
                            [
                                seg_sim.states[: len(seg_real), :],
                                seg_sim.get_data_values(self._action_field)[: len(seg_real), :],
                            ],
                            axis=1,
                        )
                        if self._embedding.requires_target_domain_data:
                            # The embedding is also using target domain data (the case for DTW distance)
                            data_one_seg_real = np.concatenate(
                                [seg_real.states[: len(seg_real), :], seg_real.get_data_values(self._action_field)],
                                axis=1,
                            )
                            data_one_seg = np.concatenate([data_one_seg, data_one_seg_real], axis=1)
                        data_one_seg = to.from_numpy(data_one_seg).to(dtype=to.get_default_dtype())
                        data_one_ro.append(data_one_seg)

                    # Append one simulated rollout
                    data_sim_one_dp.append(to.cat(data_one_ro, dim=0))

                # Append the segments of all target domain rollouts for the current domain parameter
                data_sim_all.append(to.stack(data_sim_one_dp, dim=0))

            # Compute the features from all time series
            data_sim_all = to.stack(data_sim_all, dim=0)  # shape [batch_size, num_rollouts, len_time_series, dim_data]
            data_sim_all = self._embedding(Embedding.pack(data_sim_all))  # shape [batch_size, num_rollouts * dim_data]

            # Check shape
            if data_sim_all.shape != (dp_values.shape[0], len(self.rollouts_real) * self._embedding.dim_output):
                raise pyrado.ShapeErr(
                    given=data_sim_all,
                    expected_match=(dp_values.shape[0], len(self.rollouts_real) * self._embedding.dim_output),
                )

        else:
            # There are no pre-recorded rollouts, e.g. during _setup_sbi().
            # Use the current policy yo generate the actions.
            policy = self._policy

            # Do the rollouts
            data_sim_all = []
            for dp_value in dp_values:
                ro_sim = rollout(
                    self._env,
                    policy,
                    eval=True,
                    reset_kwargs=dict(domain_param=dict(zip(self.dp_names, dp_value))),
                    stop_on_done=self.stop_on_done,
                )
                check_domain_params(ro_sim, dp_value, self.dp_names)

                # Pad if necessary
                StepSequence.pad(ro_sim, self._env.max_steps)

                # Concatenate states and actions of the simulated segments
                data_one_seg = np.concatenate(
                    [ro_sim.states[:-1, :], ro_sim.get_data_values(self._action_field)], axis=1
                )
                if self._embedding.requires_target_domain_data:
                    data_one_seg = np.concatenate([data_one_seg, data_one_seg], axis=1)
                data_one_seg = to.from_numpy(data_one_seg).to(dtype=to.get_default_dtype())
                data_sim_all.append(data_one_seg)

            # Compute the features from all time series
            data_sim_all = to.stack(data_sim_all, dim=0)
            data_sim_all = data_sim_all.unsqueeze(1)  # equivalent to only one target domain rollout
            data_sim_all = self._embedding(Embedding.pack(data_sim_all))  # shape [batch_size,  dim_feat]

            # Check shape
            if data_sim_all.shape != (dp_values.shape[0], self._embedding.dim_output):
                raise pyrado.ShapeErr(
                    given=data_sim_all, expected_match=(dp_values.shape[0], self._embedding.dim_output)
                )

        return data_sim_all  # shape [batch_size, num_rollouts * dim_feat]