Python BoxSpace Examples, pyrado.spaces.BoxSpace Python Examples

Example #1

0

Show file

    def _create_spaces(self):
        # State space (normalized time, since we do not have a simulation)
        self._state_space = BoxSpace(np.array([0.]), np.array([1.]))

        # Action space (PD controller on 3 joint positions and velocities)
        max_act = np.array([
            np.pi,
            np.pi,
            np.pi,  # [rad, rad, rad, ...
            10 * np.pi,
            10 * np.pi,
            10 * np.pi
        ])  # ... rad/s, rad/s, rad/s]
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=[
                                       r'$q_{1,des}$', r'$q_{3,des}$',
                                       r'$q_{5,des}$', r'$\dot{q}_{1,des}$',
                                       r'$\dot{q}_{3,des}$',
                                       r'$\dot{q}_{5,des}$'
                                   ])

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['$t$'])

Example #2

0

Show file

File: wam.py Project: arlene-kuehn/SimuRLacra

    def _create_spaces(self):
        # State space (normalized time, since we do not have a simulation)
        self._state_space = BoxSpace(np.array([0.]), np.array([1.]))

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])

Example #3

0

Show file

File: functions.py Project: fdamken/SimuRLacra

def skyline(
    dt: Union[int, float, np.ndarray],
    t_end: Union[int, float, np.ndarray],
    t_intvl_space: BoxSpace,
    val_space: BoxSpace,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Step function that randomly samples a value from the given range, and then holds this value for a time interval
    which is also randomly sampled given a range of time intervals. This procedure is repeated until the sequence is
    long enough, i.e. `dt * t_end` samples.

    :param dt: time step size
    :param t_end: final time
    :param t_intvl_space: 1-dim `BoxSpace` determining the range of time intervals that can be sampled
    :param val_space: 1-dim `BoxSpace` determining the range of values that can be sampled
    :return: array of time steps together with the associated array of values
    """
    if dt <= 0:
        raise pyrado.ValueErr(given=dt, g_constraint="0")
    if t_end < dt:
        raise pyrado.ValueErr(given=t_end, ge_constraint=f"{dt}")
    if not isinstance(t_intvl_space, BoxSpace):
        raise pyrado.TypeErr(given=t_intvl_space, expected_type=BoxSpace)
    if not isinstance(val_space, BoxSpace):
        raise pyrado.TypeErr(given=val_space, expected_type=BoxSpace)
    if not t_intvl_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=t_intvl_space, expected_match=(1, ))
    if not val_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=val_space, expected_match=(1, ))

    dt = np.asarray(dt, dtype=np.float32)
    t_end = np.asarray(t_end, dtype=np.float32)

    # First iter
    t_intvl = t_intvl_space.sample_uniform()
    t_intvl = np.clip(t_intvl, dt, t_end + dt)
    t = np.arange(start=0.0, stop=t_intvl, step=dt)
    vals = val_space.sample_uniform() * np.ones_like(t)

    # Iterate until the time is up
    while t[-1] < t_end:
        t_intvl = t_intvl_space.sample_uniform()
        t_intvl = np.clip(t_intvl, dt, t_end - t[-1] + dt)
        t_new = np.arange(start=t[-1] + dt, stop=t[-1] + t_intvl, step=dt)
        t = np.concatenate([t, t_new])
        val_new = val_space.sample_uniform() * np.ones_like(t_new)
        vals = np.concatenate([vals, val_new])

    return t, vals

Example #4

0

Show file

File: wam.py Project: arlene-kuehn/SimuRLacra

    def _create_spaces(self):
        # State space (joint positions and velocities)
        state_shape = (2 * self.num_dof, )
        state_up, state_lo = np.full(state_shape, pyrado.inf), np.full(
            state_shape, -pyrado.inf)
        self._state_space = BoxSpace(state_lo, state_up)

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])

Example #5

0

Show file

File: wam_jsc.py Project: fdamken/SimuRLacra

 def obs_space(self) -> Space:
     lo = np.concatenate([
         WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
         WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
     ])
     up = np.concatenate([
         WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
         WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
     ])
     return BoxSpace(bound_lo=lo, bound_up=up)

Example #6

0

Show file

    def obs_space(self) -> Space:
        # Observation space (normalized time and optionally cup and ball position)
        obs_lo, obs_up, labels = [0.0], [1.0], ["t"]

        if self.observe_ball:
            obs_lo.extend([-3.0, -3.0])
            obs_up.extend([3.0, 3.0])
            labels.extend(["ball_x", "ball_z"])
        if self.observe_cup:
            obs_lo.extend([-3.0, -3.0])
            obs_up.extend([3.0, 3.0])
            labels.extend(["cup_x", "cup_z"])

        return BoxSpace(obs_lo, obs_up, labels=labels)

Example #7

0

Show file

def test_bayrn_power(ex_dir, env: SimEnv, bayrn_hparam: dict):
    pyrado.set_seed(0)

    # Environments and domain randomization
    env_real = deepcopy(env)
    env_sim = DomainRandWrapperLive(env, create_zero_var_randomizer(env))
    dp_map = create_default_domain_param_map_qq()
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)
    env_real.domain_param = dict(mass_pend_pole=0.024 * 1.1,
                                 mass_rot_pole=0.095 * 1.1)
    env_real = wrap_like_other_env(env_real, env_sim)

    # Policy and subroutine
    policy_hparam = dict(energy_gain=0.587, ref_energy=0.827)
    policy = QQubeSwingUpAndBalanceCtrl(env_sim.spec, **policy_hparam)
    subrtn_hparam = dict(
        max_iter=1,
        pop_size=8,
        num_init_states_per_domain=1,
        num_is_samples=4,
        expl_std_init=0.1,
        num_workers=1,
    )
    subrtn = PoWER(ex_dir, env_sim, policy, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = inner_env(env_sim).get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"], 1e-8,
            0.8 * dp_nom["mass_rot_pole"], 1e-8
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"], 1e-7,
            1.2 * dp_nom["mass_rot_pole"], 1e-7
        ]),
    )

    # Create algorithm and train
    algo = BayRn(ex_dir,
                 env_sim,
                 env_real,
                 subrtn,
                 ddp_space,
                 **bayrn_hparam,
                 num_workers=1)
    algo.train()

    assert algo.curr_iter == algo.max_iter or algo.stopping_criterion_met()

Example #8

0

Show file

File: wam_jsc.py Project: fdamken/SimuRLacra

 def state_space(self) -> Space:
     lo = np.concatenate([
         WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
         WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
     ])
     up = np.concatenate([
         WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
         WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
     ])
     return BoxSpace(
         bound_lo=lo,
         bound_up=up,
         labels=[f"q_{i}"
                 for i in range(1, 8)] + [f"qd_{i}" for i in range(1, 8)],
     )

Example #9

0

Show file

File: domain_distribution.py Project: fdamken/SimuRLacra

    def __init__(
        self,
        mapping: Dict[int, Tuple[str, str]],
        trafo_mask: Union[list, to.Tensor],
        prior: DomainRandomizer = None,
        scale_params: bool = False,
        use_cuda: bool = False,
    ):
        """
        Constructor

        :param mapping: mapping from subsequent integers to domain distribution parameters, where the first string of
                        the value tuple is the name of the domain parameter (e.g. mass, length) and the second string is
                        the name of the distribution parameter (e.g. mean, std).  The integers are indices of the numpy
                        array which come from the algorithm.
        :param trafo_mask: every domain parameter that is set to `True` in this mask will be learned via a 'virtual'
                           parameter, i.e. in sqrt-space, and then finally squared to retrieve the domain parameter.
                           This transformation is useful to avoid setting a negative variance.
        :param prior: prior believe about the distribution parameters in from of a `DomainRandomizer`
        :param scale_params: if `True`, the sqrt-transformed policy parameters are scaled in the range of $[-0.5, 0.5]$.
                             The advantage of this is to make the parameter-based exploration easier.
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        if not isinstance(mapping, dict):
            raise pyrado.TypeErr(given=mapping, expected_type=dict)
        if not len(trafo_mask) == len(mapping):
            raise pyrado.ShapeErr(given=trafo_mask, expected_match=mapping)

        self._mapping = mapping
        self.mask = to.tensor(trafo_mask, dtype=to.bool)
        self.prior = prior
        self._scale_params = scale_params

        # Define the parameter space by using the Policy.env_spec.act_space
        param_spec = EnvSpec(obs_space=EmptySpace(),
                             act_space=BoxSpace(-pyrado.inf,
                                                pyrado.inf,
                                                shape=len(mapping)))

        # Call Policy's constructor
        super().__init__(param_spec, use_cuda)

        self.params = nn.Parameter(to.empty(param_spec.act_space.flat_dim),
                                   requires_grad=True)
        if self._scale_params:
            self.param_scaler = None  # initialized during init_param()
        self.init_param(prior=prior)

Example #10

0

Show file

    def state_space(self) -> Space:
        # State space (joint positions and velocities)
        state_lo = np.concatenate([
            WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
            WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
        ])
        state_up = np.concatenate([
            WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
            WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
        ])

        # Ball and cup (x,y,z)-space
        if self.observe_ball:
            state_lo = np.r_[state_lo, np.full((3, ), -3.0)]
            state_up = np.r_[state_up, np.full((3, ), 3.0)]
        if self.observe_cup:
            state_lo = np.r_[state_lo, np.full((3, ), -3.0)]
            state_up = np.r_[state_up, np.full((3, ), 3.0)]

        return BoxSpace(state_lo, state_up)

Example #11

0

Show file

File: qq-su_bayrn_power_sim2sim.py Project: arlene-kuehn/SimuRLacra

    #     max_iter=300,
    #     min_steps=23*env_sim.max_steps,
    #     num_epoch=7,
    #     eps_clip=0.0744,
    #     batch_size=500,
    #     std_init=0.9074,
    #     lr=3.446e-04,
    #     max_grad_norm=1.,
    #     num_workers=12,
    # )
    # subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([0.8*dp_nom['Mp'], 1e-12, 0.8*dp_nom['Mr'], 1e-12]),
        bound_up=np.array([1.2*dp_nom['Mp'], 1e-11, 1.2*dp_nom['Mr'], 1e-11])
    )

    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc='UCB',
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=4,
        warmstart=False,
        num_eval_rollouts_real=10,  # sim-2-sim
        # thold_succ_subrtn=300,
    )

Example #12

0

Show file

File: omo_sac.py Project: arlene-kuehn/SimuRLacra

    env = OneMassOscillatorSim(**env_hparams, task_args=dict(task_args=dict(state_des=np.array([0.5, 0]))))
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=[32, 32],
        shared_hidden_nonlin=to.relu,
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    qfcn_hparam = dict(
        hidden_sizes=[32, 32],
        hidden_nonlin=to.relu
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    qfcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace), **qfcn_hparam)
    qfcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace), **qfcn_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=1000*env.max_steps,
        memory_size=100*env.max_steps,
        gamma=0.995,
        num_batch_updates=1,
        tau=0.995,
        ent_coeff_init=0.2,
        learn_ent_coeff=True,
        target_update_intvl=5,
        standardize_rew=False,
        min_steps=1,

Example #13

0

Show file

File: qq-su_bayrn_ppo.py Project: fdamken/SimuRLacra

    )
    subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 5000,
            0.8 * dp_nom["mass_rot_pole"],
            dp_nom["mass_rot_pole"] / 5000,
            0.8 * dp_nom["length_pend_pole"],
            dp_nom["length_pend_pole"] / 5000,
            0.8 * dp_nom["length_rot_pole"],
            dp_nom["length_rot_pole"] / 5000,
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 20,
            1.2 * dp_nom["mass_rot_pole"],
            dp_nom["mass_rot_pole"] / 20,
            1.2 * dp_nom["length_pend_pole"],
            dp_nom["length_pend_pole"] / 20,
            1.2 * dp_nom["length_rot_pole"],
            dp_nom["length_rot_pole"] / 20,
        ]),
    )

    # policy_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'policy.pt'))
    # valuefcn_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'valuefcn.pt'))

    # Algorithm

Example #14

0

Show file

    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array(
            [
                0.95 * dp_nom["rope_length"],
                dp_nom["rope_length"] / 1000,
                0.0 * dp_nom["rope_damping"],
                dp_nom["rope_damping"] / 100,
                0.85 * dp_nom["ball_mass"],
                dp_nom["ball_mass"] / 1000,
                0.0 * dp_nom["joint_2_dryfriction"],
                dp_nom["joint_2_dryfriction"] / 100,
                0.0 * dp_nom["joint_2_damping"],
                dp_nom["joint_2_damping"] / 100,
            ]
        ),
        bound_up=np.array(
            [
                1.05 * dp_nom["rope_length"],
                dp_nom["rope_length"] / 20,
                2 * dp_nom["rope_damping"],
                dp_nom["rope_damping"] / 2,
                1.15 * dp_nom["ball_mass"],
                dp_nom["ball_mass"] / 10,
                2 * dp_nom["joint_2_dryfriction"],
                dp_nom["joint_2_dryfriction"] / 2,
                2 * dp_nom["joint_2_damping"],
                dp_nom["joint_2_damping"] / 2,
            ]
        ),
    )

Example #15

0

Show file

File: test_utils.py Project: arlene-kuehn/SimuRLacra

                 alpha=0.2)
        plt.scatter(x.data.numpy(), obj_fcn().numpy(), s=16, color=colors[e])

    plt.xlabel('$x$')
    plt.ylabel('$f(x)$')
    plt.legend()
    plt.show()
    assert noisy_nonlin_fcn(x, f=f, noise_std=noise_std) < noisy_nonlin_fcn(
        x_init, f=f, noise_std=noise_std)


@pytest.mark.parametrize('dt', [0.1], ids=['0.1'])
@pytest.mark.parametrize('t_end', [6.], ids=['1.'])
@pytest.mark.parametrize(
    't_intvl_space', [
        BoxSpace(0.1, 0.11, shape=1),
        BoxSpace(0.123, 0.456, shape=1),
        BoxSpace(10., 20., shape=1),
    ],
    ids=['small_time_intvl', 'real_time_intvl', 'large_time_intvl'])
@pytest.mark.parametrize('val_space', [BoxSpace(-5., 3., shape=1)],
                         ids=['-5_to_3'])
def test_skyline(dt: Union[int, float], t_end: Union[int, float],
                 t_intvl_space: BoxSpace, val_space: BoxSpace):
    # Create the skyline function
    t, vals = skyline(dt, t_end, t_intvl_space, val_space)
    assert isinstance(t, np.ndarray) and isinstance(vals, np.ndarray)
    assert len(t) == len(vals)


def test_check_prompt():

Example #16

0

Show file

File: test_policies.py Project: arlene-kuehn/SimuRLacra

from pyrado.policies.feed_forward.linear import LinearPolicy
from pyrado.policies.features import *
from pyrado.policies.recurrent.two_headed_rnn import TwoHeadedRNNPolicyBase
from pyrado.sampling.rollout import rollout
from pyrado.sampling.step_sequence import StepSequence
from pyrado.utils.data_sets import TimeSeriesDataSet
from pyrado.utils.data_types import RenderMode
from pyrado.utils.functions import skyline
from pyrado.utils.nn_layers import IndiNonlinLayer
from tests.conftest import m_needs_bullet, m_needs_mujoco, m_needs_rcs, m_needs_libtorch
from tests.environment_wrappers.mock_env import MockEnv


@pytest.fixture(scope='function',
                params=[
                    (skyline(0.02, 20., BoxSpace(0.5, 3, shape=(1,)), BoxSpace(-2., 3., shape=(1,)))[1],
                     0.7, 50, False, True),
                ],
                ids=['skyline_0.8_50_notstdized_scaled'])
def tsdataset(request):
    return TimeSeriesDataSet(
        data=to.from_numpy(request.param[0]),
        ratio_train=request.param[1],
        window_size=request.param[2],
        standardize_data=request.param[3],
        scale_min_max_data=request.param[4]
    )


@pytest.mark.features
@pytest.mark.parametrize(

Example #17

0

Show file

File: qq-su_bayrn_ppo.py Project: arlene-kuehn/SimuRLacra

                         std_init=0.7573286998997557,
                         lr=6.999956625305722e-04,
                         max_grad_norm=1.,
                         num_workers=8,
                         lr_scheduler=lr_scheduler.ExponentialLR,
                         lr_scheduler_hparam=dict(gamma=0.999))
    subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(bound_lo=np.array([
        0.8 * dp_nom['Mp'], dp_nom['Mp'] / 5000, 0.8 * dp_nom['Mr'],
        dp_nom['Mr'] / 5000, 0.8 * dp_nom['Lp'], dp_nom['Lp'] / 5000,
        0.8 * dp_nom['Lr'], dp_nom['Lr'] / 5000
    ]),
                         bound_up=np.array([
                             1.2 * dp_nom['Mp'], dp_nom['Mp'] / 20,
                             1.2 * dp_nom['Mr'], dp_nom['Mr'] / 20,
                             1.2 * dp_nom['Lp'], dp_nom['Lp'] / 20,
                             1.2 * dp_nom['Lr'], dp_nom['Lr'] / 20
                         ]))

    # policy_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'policy.pt'))
    # valuefcn_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'valuefcn.pt'))

    # Algorithm
    bayrn_hparam = dict(
        thold_succ=300.,
        max_iter=15,
        acq_fc='EI',
        # acq_param=dict(beta=0.2),

Example #18

0

Show file

File: wam_jsc.py Project: fdamken/SimuRLacra

 def state_space(self) -> Space:
     # Normalized time
     return BoxSpace(np.array([0.0]), np.array([1.0]), labels=["t"])

Example #19

0

Show file

WAM_Q_LIMITS_LO_7DOF = np.array([-2.6, -2.0, -2.8, -0.9, -4.76, -1.6, -3.0
                                 ]) + 5 * np.pi / 180
WAM_Q_LIMITS_UP_7DOF = np.array([+2.6, +2.0, +2.8, +3.1, +1.24, +1.6, +3.0
                                 ]) - 5 * np.pi / 180
# Arbitrarily set velocity limits [rad/s]
WAM_QD_LIMITS_LO_7DOF = -np.array([
    4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi
])
WAM_QD_LIMITS_UP_7DOF = +np.array([
    4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi
])

# Torque limits
# 7 DoF
MAX_TORQUE_7DOF = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0, 2.0])
TORQUE_SPACE_WAM_7DOF = BoxSpace(-MAX_TORQUE_7DOF, MAX_TORQUE_7DOF)
# 4 DoF
MAX_TORQUE_4DOF = MAX_TORQUE_7DOF[:4]
TORQUE_SPACE_WAM_4DOF = BoxSpace(-MAX_TORQUE_4DOF, MAX_TORQUE_4DOF)

# Default PD gains from robcom / SL
WAM_PGAINS_7DOF = np.array([200.0, 300.0, 100.0, 100.0, 10.0, 10.0, 2.5])
WAM_DGAINS_7DOF = np.array([7.0, 15.0, 5.0, 2.5, 0.3, 0.3, 0.05])
WAM_PGAINS_4DOF = WAM_PGAINS_7DOF[:4]
WAM_DGAINS_4DOF = WAM_DGAINS_7DOF[:4]

# Desired initial joint angles
# 7 DoF
INIT_QPOS_DES_7DOF = np.array([0.0, 0.5876, 0.0, 1.36, 0.0, -0.321, -1.57])
# 4 DoF arm
INIT_QPOS_DES_4DOF = np.array([0.0, 0.6, 0.0, 1.25])

Example #20

0

Show file

File: two_dim_gaussian.py Project: fdamken/SimuRLacra

 def state_space(self):
     max_state = np.array([100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0])
     return BoxSpace(-max_state, max_state, labels=["x_1", "x_2", "x_2", "x_3", "x_5", "x_6", "x_7", "x_8"])

Example #21

0

Show file

    # Select the data set
    data_set_name = args.mode or "skyline"

    # Experiment
    ex_dir = setup_experiment(TSPred.name, ADNPolicy.name)

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Load the data
    if data_set_name == "skyline":
        dt = 0.01
        _, vals = skyline(dt=dt,
                          t_end=20.0,
                          t_intvl_space=BoxSpace(0.5, 3, shape=(1, )),
                          val_space=BoxSpace(-2.0, 3.0, shape=(1, )))
        data = to.from_numpy(vals).to(dtype=to.get_default_dtype()).view(-1, 1)
    else:
        data = pd.read_csv(
            osp.join(pyrado.PERMA_DIR, "misc", f"{data_set_name}.csv"))
        if data_set_name == "daily_min_temperatures":
            data = to.tensor(data["Temp"].values,
                             dtype=to.get_default_dtype()).view(-1, 1)
            dt = 1.0
        elif data_set_name == "monthly_sunspots":
            data = to.tensor(data["Sunspots"].values,
                             dtype=to.get_default_dtype()).view(-1, 1)
            dt = 1.0
        elif "oscillation" in data_set_name:
            data = to.tensor(data["Positions"].values,

Example #22

0

Show file

File: wam-bic_bayrn_power.py Project: arlene-kuehn/SimuRLacra

        5: ('ball_mass', 'std'),
        6: ('joint_stiction', 'mean'),
        7: ('joint_stiction', 'halfspan'),
        8: ('joint_damping', 'mean'),
        9: ('joint_damping', 'halfspan'),
    }
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Set the boundaries for the GP (must be consistent with dp_map)
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([0.95*dp_nom['rope_length'], dp_nom['rope_length']/1000,
                           0.*dp_nom['rope_damping'], dp_nom['rope_damping']/100,
                           0.85*dp_nom['ball_mass'], dp_nom['ball_mass']/1000,
                           0.*dp_nom['joint_stiction'], dp_nom['joint_stiction']/100,
                           0.*dp_nom['joint_damping'], dp_nom['joint_damping']/100]),
        bound_up=np.array([1.05*dp_nom['rope_length'], dp_nom['rope_length']/20,
                           2*dp_nom['rope_damping'], dp_nom['rope_damping']/2,
                           1.15*dp_nom['ball_mass'], dp_nom['ball_mass']/10,
                           2*dp_nom['joint_stiction'], dp_nom['joint_stiction']/2,
                           2*dp_nom['joint_damping'], dp_nom['joint_damping']/2])
    )

    # Setting the ip address to None ensures that robcom does not try to connect to the server pc
    env_real_hparams = dict(
        num_dof=4,
        max_steps=1750,
    )
    env_real = WAMBallInCupRealEpisodic(**env_real_hparams)

    # Policy
    policy_hparam = dict(

Example #23

0

Show file

File: test_algorithms.py Project: fdamken/SimuRLacra

def test_snapshots_notmeta(ex_dir, env: SimEnv, policy, algo_class,
                           algo_hparam):
    # Collect hyper-parameters, create algorithm, and train
    common_hparam = dict(max_iter=1, num_workers=1)
    common_hparam.update(algo_hparam)

    if issubclass(algo_class, ActorCritic):
        common_hparam.update(
            min_rollouts=3,
            critic=GAE(
                vfcn=FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace),
                               hidden_sizes=[16, 16],
                               hidden_nonlin=to.tanh)),
        )
    elif issubclass(algo_class, ParameterExploring):
        common_hparam.update(num_init_states_per_domain=1)
    elif issubclass(algo_class, (DQL, SAC)):
        common_hparam.update(memory_size=1000,
                             num_updates_per_step=2,
                             gamma=0.99,
                             min_rollouts=1)
        fnn_hparam = dict(hidden_sizes=[8, 8], hidden_nonlin=to.tanh)
        if issubclass(algo_class, DQL):
            # Override the setting
            env = BallOnBeamDiscSim(env.dt, env.max_steps)
            net = FNN(
                input_size=DiscreteActQValPolicy.get_qfcn_input_size(env.spec),
                output_size=DiscreteActQValPolicy.get_qfcn_output_size(),
                **fnn_hparam,
            )
            policy = DiscreteActQValPolicy(spec=env.spec, net=net)
        else:
            # Override the setting
            env = ActNormWrapper(env)
            policy = TwoHeadedGRUPolicy(env.spec,
                                        shared_hidden_size=8,
                                        shared_num_recurrent_layers=1)
            obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
            common_hparam.update(qfcn_1=FNNPolicy(
                spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam))
            common_hparam.update(qfcn_2=FNNPolicy(
                spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam))
    else:
        raise NotImplementedError

    # Simulate training
    algo = algo_class(ex_dir, env, policy, **common_hparam)
    algo.policy.param_values += to.tensor([42.0])
    if isinstance(algo, ActorCritic):
        algo.critic.vfcn.param_values += to.tensor([42.0])

    # Save and load
    algo.save_snapshot(meta_info=None)
    algo_loaded = Algorithm.load_snapshot(load_dir=ex_dir)
    assert isinstance(algo_loaded, Algorithm)
    policy_loaded = algo_loaded.policy
    if isinstance(algo, ActorCritic):
        critic_loaded = algo_loaded.critic

    # Check
    assert all(algo.policy.param_values == policy_loaded.param_values)
    if isinstance(algo, ActorCritic):
        assert all(
            algo.critic.vfcn.param_values == critic_loaded.vfcn.param_values)

    # Load the experiment. Since we did not save any hyper-parameters, we ignore the errors when loading.
    env, policy, extra = load_experiment(ex_dir)
    assert isinstance(env, Env)
    assert isinstance(policy, Policy)
    assert isinstance(extra, dict)

Example #24

0

Show file

File: conftest.py Project: fdamken/SimuRLacra

def default_randomizer():
    return DomainRandomizer(
        NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11),
    )


# --------------
# Other Fixtures
# --------------


@pytest.fixture(
    scope="function",
    params=[
        (skyline(0.02, 20.0, BoxSpace(0.5, 3, shape=(1,)), BoxSpace(-2.0, 3.0, shape=(1,)))[1], 0.7, 50, False, True),
    ],
    ids=["skyline_0.8_50_notstdized_scaled"],
)
def dataset_ts(request):
    return TimeSeriesDataSet(
        data=to.from_numpy(request.param[0]).to(dtype=to.get_default_dtype()),
        ratio_train=request.param[1],
        window_size=request.param[2],
        standardize_data=request.param[3],
        scale_min_max_data=request.param[4],
    )

Example #25

0

Show file

def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param study_dir: the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env_hparams = dict(physicsEngine="Bullet", dt=1 / 100.0, max_steps=500)
    env = BallOnPlate2DSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=trial.suggest_categorical(
            "shared_hidden_sizes_policy", [(16, 16), (32, 32), (64, 64),
                                           (16, 16, 16), (32, 32, 32)]),
        shared_hidden_nonlin=fcn_from_str(
            trial.suggest_categorical("shared_hidden_nonlin_policy",
                                      ["to_tanh", "to_relu"])),
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    qfcn_hparam = dict(
        hidden_sizes=trial.suggest_categorical("hidden_sizes_critic",
                                               [(16, 16), (32, 32), (64, 64),
                                                (16, 16, 16), (32, 32, 32)]),
        hidden_nonlin=fcn_from_str(
            trial.suggest_categorical("hidden_nonlin_critic",
                                      ["to_tanh", "to_relu"])),
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    qfcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                       **qfcn_hparam)
    qfcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                       **qfcn_hparam)

    # Algorithm
    algo_hparam = dict(
        num_workers=1,  # parallelize via optuna n_jobs
        max_iter=100 * env.max_steps,
        min_steps=trial.suggest_categorical(
            "min_steps_algo", [1]),  # 10, env.max_steps, 10*env.max_steps
        memory_size=trial.suggest_loguniform("memory_size_algo",
                                             1e2 * env.max_steps,
                                             1e4 * env.max_steps),
        tau=trial.suggest_uniform("tau_algo", 0.99, 1.0),
        ent_coeff_init=trial.suggest_uniform("ent_coeff_init_algo", 0.1, 0.9),
        learn_ent_coeff=trial.suggest_categorical("learn_ent_coeff_algo",
                                                  [True, False]),
        standardize_rew=trial.suggest_categorical("standardize_rew_algo",
                                                  [False]),
        gamma=trial.suggest_uniform("gamma_algo", 0.99, 1.0),
        target_update_intvl=trial.suggest_categorical(
            "target_update_intvl_algo", [1, 5]),
        num_updates_per_step=trial.suggest_categorical(
            "num_batch_updates_algo", [1, 5]),
        batch_size=trial.suggest_categorical("batch_size_algo",
                                             [128, 256, 512]),
        lr=trial.suggest_loguniform("lr_algo", 1e-5, 1e-3),
    )
    csv_logger = create_csv_step_logger(
        osp.join(study_dir, f"trial_{trial.number}"))
    algo = SAC(study_dir,
               env,
               policy,
               qfcn_1,
               qfcn_2,
               **algo_hparam,
               logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode="latest", seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelRolloutSampler(
        env, policy, num_workers=1,
        min_rollouts=min_rollouts)  # parallelize via optuna n_jobs
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts

    return mean_ret

Example #26

0

Show file

File: sim_rec_rollout.py Project: fdamken/SimuRLacra

    if env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt)

    elif env_name == QCartPoleSwingUpSim.name:
        env = QCartPoleSwingUpSim(dt=dt)

    elif env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt)

    elif env_name == "wam-bic":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim

        env = WAMBallInCupSim(num_dof=4)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "wam-jsc":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim

        env = WAMJointSpaceCtrlSim(num_dof=7)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "mg-ik":  # avoid loading _rcsenv
        from pyrado.environments.rcspysim.mini_golf import MiniGolfIKSim

        env = MiniGolfIKSim(dt=dt)

Example #27

0

Show file

def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env_hparams = dict(dt=1 / 100., max_steps=600)
    env = QQubeSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=trial.suggest_categorical(
            'shared_hidden_sizes_policy',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        shared_hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('shared_hidden_nonlin_policy',
                                      ['to_tanh', 'to_relu'])),
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    q_fcn_hparam = dict(
        hidden_sizes=trial.suggest_categorical(
            'hidden_sizes_critic',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('hidden_nonlin_critic',
                                      ['to_tanh', 'to_relu'])),
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    q_fcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)
    q_fcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)

    # Algorithm
    algo_hparam = dict(
        num_sampler_envs=1,  # parallelize via optuna n_jobs
        max_iter=100 * env.max_steps,
        min_steps=trial.suggest_categorical(
            'min_steps_algo', [1]),  # , 10, env.max_steps, 10*env.max_steps
        memory_size=trial.suggest_loguniform('memory_size_algo',
                                             1e2 * env.max_steps,
                                             1e4 * env.max_steps),
        tau=trial.suggest_uniform('tau_algo', 0.99, 1.),
        alpha_init=trial.suggest_uniform('alpha_init_algo', 0.1, 0.9),
        learn_alpha=trial.suggest_categorical('learn_alpha_algo',
                                              [True, False]),
        standardize_rew=trial.suggest_categorical('standardize_rew_algo',
                                                  [False]),
        gamma=trial.suggest_uniform('gamma_algo', 0.99, 1.),
        target_update_intvl=trial.suggest_categorical(
            'target_update_intvl_algo', [1, 5]),
        num_batch_updates=trial.suggest_categorical('num_batch_updates_algo',
                                                    [1, 5]),
        batch_size=trial.suggest_categorical('batch_size_algo',
                                             [128, 256, 512]),
        lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3),
    )
    csv_logger = create_csv_step_logger(
        osp.join(ex_dir, f'trial_{trial.number}'))
    algo = SAC(ex_dir,
               env,
               policy,
               q_fcn_1,
               q_fcn_2,
               **algo_hparam,
               logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(
        env, policy, num_envs=1,
        min_rollouts=min_rollouts)  # parallelize via optuna n_jobs
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts

    return mean_ret

Example #28

0

Show file

import numpy as np
import torch as to
from matplotlib import pyplot as plt
from matplotlib import ticker

import pyrado
from pyrado.policies.features import FeatureStack, RBFFeat
from pyrado.policies.linear import LinearPolicy
from pyrado.spaces import BoxSpace
from pyrado.utils.data_types import EnvSpec


if __name__ == '__main__':
    # Define some arbitrary EnvSpec
    obs_space = BoxSpace(bound_lo=np.array([-5., -12.]), bound_up=np.array([10., 6.]))
    act_space = BoxSpace(bound_lo=np.array([-1.]), bound_up=np.array([1.]))
    spec = EnvSpec(obs_space, act_space)

    num_fpd = 5
    num_eval_points = 500

    policy_hparam = dict(
        feats=FeatureStack([RBFFeat(num_feat_per_dim=num_fpd, bounds=obs_space.bounds, scale=None)])
    )
    policy = LinearPolicy(spec, **policy_hparam)

    eval_grid_0 = to.linspace(-5., 10, num_eval_points)
    eval_grid_1 = to.linspace(-12., 6, num_eval_points)
    eval_grid = to.stack([eval_grid_0, eval_grid_1], dim=1)

    feat_vals = to.empty(num_eval_points, num_fpd*obs_space.flat_dim)

Example #29

0

Show file

# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import numpy as np

from pyrado.spaces import BoxSpace

# 4 DoF arm, 2 DoF actuated
init_qpos_des_4dof = np.array([0., 0.6, 0., 1.25])
act_min_wam_4dof = np.array([-1.985, -0.9, -10 * np.pi, -10 * np.pi])
act_max_wam_4dof = np.array([1.985, np.pi, 10 * np.pi, 10 * np.pi])
labels_4dof = ['q_1_des', 'q_3_des', 'q_dot_1_des', 'q_dot_3_des']
act_space_wam_4dof = BoxSpace(act_min_wam_4dof,
                              act_max_wam_4dof,
                              labels=labels_4dof)

# 7 DoF arm, 3 DoF actuated
init_qpos_des_7dof = np.array([0., 0.5876, 0., 1.36, 0., -0.321, -1.57])
act_min_wam_7dof = np.array(
    [-1.985, -0.9, -np.pi / 2, -10 * np.pi, -10 * np.pi, -10 * np.pi])
act_max_wam_7dof = np.array(
    [1.985, np.pi, np.pi / 2, 10 * np.pi, 10 * np.pi, 10 * np.pi])
labels_7dof = [
    'q_1_des', 'q_3_des', 'q_5_des', 'q_dot_1_des', 'q_dot_3_des',
    'q_dot_5_des'
]
act_space_wam_7dof = BoxSpace(act_min_wam_7dof,
                              act_max_wam_7dof,
                              labels=labels_7dof)

Example #30

0

Show file

File: qq-su_bayrn_power_sim2sim.py Project: fdamken/SimuRLacra

    #     num_init_states_per_domain=6,
    #     num_is_samples=10,
    #     expl_std_init=2.0,
    #     expl_std_min=0.02,
    #     symm_sampling=False,
    #     num_workers=32,
    # )
    # subrtn = PoWER(ex_dir, env_sim, policy, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"], 1e-8,
            0.8 * dp_nom["mass_rot_pole"], 1e-8
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"], 1e-7,
            1.2 * dp_nom["mass_rot_pole"], 1e-7
        ]),
    )

    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc="UCB",
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=4,
        warmstart=False,
        num_eval_rollouts_real=100,