Esempio n. 1
0
    def _create_spaces(self):
        # State space (normalized time, since we do not have a simulation)
        self._state_space = BoxSpace(np.array([0.]), np.array([1.]))

        # Action space (PD controller on 3 joint positions and velocities)
        max_act = np.array([
            np.pi,
            np.pi,
            np.pi,  # [rad, rad, rad, ...
            10 * np.pi,
            10 * np.pi,
            10 * np.pi
        ])  # ... rad/s, rad/s, rad/s]
        self._act_space = BoxSpace(-max_act,
                                   max_act,
                                   labels=[
                                       r'$q_{1,des}$', r'$q_{3,des}$',
                                       r'$q_{5,des}$', r'$\dot{q}_{1,des}$',
                                       r'$\dot{q}_{3,des}$',
                                       r'$\dot{q}_{5,des}$'
                                   ])

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['$t$'])
Esempio n. 2
0
    def _create_spaces(self):
        # State space (normalized time, since we do not have a simulation)
        self._state_space = BoxSpace(np.array([0.]), np.array([1.]))

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])
Esempio n. 3
0
def skyline(
    dt: Union[int, float, np.ndarray],
    t_end: Union[int, float, np.ndarray],
    t_intvl_space: BoxSpace,
    val_space: BoxSpace,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Step function that randomly samples a value from the given range, and then holds this value for a time interval
    which is also randomly sampled given a range of time intervals. This procedure is repeated until the sequence is
    long enough, i.e. `dt * t_end` samples.

    :param dt: time step size
    :param t_end: final time
    :param t_intvl_space: 1-dim `BoxSpace` determining the range of time intervals that can be sampled
    :param val_space: 1-dim `BoxSpace` determining the range of values that can be sampled
    :return: array of time steps together with the associated array of values
    """
    if dt <= 0:
        raise pyrado.ValueErr(given=dt, g_constraint="0")
    if t_end < dt:
        raise pyrado.ValueErr(given=t_end, ge_constraint=f"{dt}")
    if not isinstance(t_intvl_space, BoxSpace):
        raise pyrado.TypeErr(given=t_intvl_space, expected_type=BoxSpace)
    if not isinstance(val_space, BoxSpace):
        raise pyrado.TypeErr(given=val_space, expected_type=BoxSpace)
    if not t_intvl_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=t_intvl_space, expected_match=(1, ))
    if not val_space.flat_dim == 1:
        raise pyrado.ShapeErr(given=val_space, expected_match=(1, ))

    dt = np.asarray(dt, dtype=np.float32)
    t_end = np.asarray(t_end, dtype=np.float32)

    # First iter
    t_intvl = t_intvl_space.sample_uniform()
    t_intvl = np.clip(t_intvl, dt, t_end + dt)
    t = np.arange(start=0.0, stop=t_intvl, step=dt)
    vals = val_space.sample_uniform() * np.ones_like(t)

    # Iterate until the time is up
    while t[-1] < t_end:
        t_intvl = t_intvl_space.sample_uniform()
        t_intvl = np.clip(t_intvl, dt, t_end - t[-1] + dt)
        t_new = np.arange(start=t[-1] + dt, stop=t[-1] + t_intvl, step=dt)
        t = np.concatenate([t, t_new])
        val_new = val_space.sample_uniform() * np.ones_like(t_new)
        vals = np.concatenate([vals, val_new])

    return t, vals
Esempio n. 4
0
    def _create_spaces(self):
        # State space (joint positions and velocities)
        state_shape = (2 * self.num_dof, )
        state_up, state_lo = np.full(state_shape, pyrado.inf), np.full(
            state_shape, -pyrado.inf)
        self._state_space = BoxSpace(state_lo, state_up)

        # Action space (PD controller on joint positions and velocities)
        if self.num_dof == 4:
            self._act_space = act_space_wam_4dof
        elif self.num_dof == 7:
            self._act_space = act_space_wam_7dof

        # Observation space (normalized time)
        self._obs_space = BoxSpace(np.array([0.]),
                                   np.array([1.]),
                                   labels=['t'])
Esempio n. 5
0
 def obs_space(self) -> Space:
     lo = np.concatenate([
         WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
         WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
     ])
     up = np.concatenate([
         WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
         WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
     ])
     return BoxSpace(bound_lo=lo, bound_up=up)
Esempio n. 6
0
    def obs_space(self) -> Space:
        # Observation space (normalized time and optionally cup and ball position)
        obs_lo, obs_up, labels = [0.0], [1.0], ["t"]

        if self.observe_ball:
            obs_lo.extend([-3.0, -3.0])
            obs_up.extend([3.0, 3.0])
            labels.extend(["ball_x", "ball_z"])
        if self.observe_cup:
            obs_lo.extend([-3.0, -3.0])
            obs_up.extend([3.0, 3.0])
            labels.extend(["cup_x", "cup_z"])

        return BoxSpace(obs_lo, obs_up, labels=labels)
Esempio n. 7
0
def test_bayrn_power(ex_dir, env: SimEnv, bayrn_hparam: dict):
    pyrado.set_seed(0)

    # Environments and domain randomization
    env_real = deepcopy(env)
    env_sim = DomainRandWrapperLive(env, create_zero_var_randomizer(env))
    dp_map = create_default_domain_param_map_qq()
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)
    env_real.domain_param = dict(mass_pend_pole=0.024 * 1.1,
                                 mass_rot_pole=0.095 * 1.1)
    env_real = wrap_like_other_env(env_real, env_sim)

    # Policy and subroutine
    policy_hparam = dict(energy_gain=0.587, ref_energy=0.827)
    policy = QQubeSwingUpAndBalanceCtrl(env_sim.spec, **policy_hparam)
    subrtn_hparam = dict(
        max_iter=1,
        pop_size=8,
        num_init_states_per_domain=1,
        num_is_samples=4,
        expl_std_init=0.1,
        num_workers=1,
    )
    subrtn = PoWER(ex_dir, env_sim, policy, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = inner_env(env_sim).get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"], 1e-8,
            0.8 * dp_nom["mass_rot_pole"], 1e-8
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"], 1e-7,
            1.2 * dp_nom["mass_rot_pole"], 1e-7
        ]),
    )

    # Create algorithm and train
    algo = BayRn(ex_dir,
                 env_sim,
                 env_real,
                 subrtn,
                 ddp_space,
                 **bayrn_hparam,
                 num_workers=1)
    algo.train()

    assert algo.curr_iter == algo.max_iter or algo.stopping_criterion_met()
Esempio n. 8
0
 def state_space(self) -> Space:
     lo = np.concatenate([
         WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
         WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
     ])
     up = np.concatenate([
         WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
         WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
     ])
     return BoxSpace(
         bound_lo=lo,
         bound_up=up,
         labels=[f"q_{i}"
                 for i in range(1, 8)] + [f"qd_{i}" for i in range(1, 8)],
     )
Esempio n. 9
0
    def __init__(
        self,
        mapping: Dict[int, Tuple[str, str]],
        trafo_mask: Union[list, to.Tensor],
        prior: DomainRandomizer = None,
        scale_params: bool = False,
        use_cuda: bool = False,
    ):
        """
        Constructor

        :param mapping: mapping from subsequent integers to domain distribution parameters, where the first string of
                        the value tuple is the name of the domain parameter (e.g. mass, length) and the second string is
                        the name of the distribution parameter (e.g. mean, std).  The integers are indices of the numpy
                        array which come from the algorithm.
        :param trafo_mask: every domain parameter that is set to `True` in this mask will be learned via a 'virtual'
                           parameter, i.e. in sqrt-space, and then finally squared to retrieve the domain parameter.
                           This transformation is useful to avoid setting a negative variance.
        :param prior: prior believe about the distribution parameters in from of a `DomainRandomizer`
        :param scale_params: if `True`, the sqrt-transformed policy parameters are scaled in the range of $[-0.5, 0.5]$.
                             The advantage of this is to make the parameter-based exploration easier.
        :param use_cuda: `True` to move the policy to the GPU, `False` (default) to use the CPU
        """
        if not isinstance(mapping, dict):
            raise pyrado.TypeErr(given=mapping, expected_type=dict)
        if not len(trafo_mask) == len(mapping):
            raise pyrado.ShapeErr(given=trafo_mask, expected_match=mapping)

        self._mapping = mapping
        self.mask = to.tensor(trafo_mask, dtype=to.bool)
        self.prior = prior
        self._scale_params = scale_params

        # Define the parameter space by using the Policy.env_spec.act_space
        param_spec = EnvSpec(obs_space=EmptySpace(),
                             act_space=BoxSpace(-pyrado.inf,
                                                pyrado.inf,
                                                shape=len(mapping)))

        # Call Policy's constructor
        super().__init__(param_spec, use_cuda)

        self.params = nn.Parameter(to.empty(param_spec.act_space.flat_dim),
                                   requires_grad=True)
        if self._scale_params:
            self.param_scaler = None  # initialized during init_param()
        self.init_param(prior=prior)
Esempio n. 10
0
    def state_space(self) -> Space:
        # State space (joint positions and velocities)
        state_lo = np.concatenate([
            WAM_Q_LIMITS_LO_7DOF[:self._num_dof],
            WAM_QD_LIMITS_LO_7DOF[:self._num_dof]
        ])
        state_up = np.concatenate([
            WAM_Q_LIMITS_UP_7DOF[:self._num_dof],
            WAM_QD_LIMITS_UP_7DOF[:self._num_dof]
        ])

        # Ball and cup (x,y,z)-space
        if self.observe_ball:
            state_lo = np.r_[state_lo, np.full((3, ), -3.0)]
            state_up = np.r_[state_up, np.full((3, ), 3.0)]
        if self.observe_cup:
            state_lo = np.r_[state_lo, np.full((3, ), -3.0)]
            state_up = np.r_[state_up, np.full((3, ), 3.0)]

        return BoxSpace(state_lo, state_up)
    #     max_iter=300,
    #     min_steps=23*env_sim.max_steps,
    #     num_epoch=7,
    #     eps_clip=0.0744,
    #     batch_size=500,
    #     std_init=0.9074,
    #     lr=3.446e-04,
    #     max_grad_norm=1.,
    #     num_workers=12,
    # )
    # subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([0.8*dp_nom['Mp'], 1e-12, 0.8*dp_nom['Mr'], 1e-12]),
        bound_up=np.array([1.2*dp_nom['Mp'], 1e-11, 1.2*dp_nom['Mr'], 1e-11])
    )

    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc='UCB',
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=4,
        warmstart=False,
        num_eval_rollouts_real=10,  # sim-2-sim
        # thold_succ_subrtn=300,
    )
Esempio n. 12
0
    env = OneMassOscillatorSim(**env_hparams, task_args=dict(task_args=dict(state_des=np.array([0.5, 0]))))
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=[32, 32],
        shared_hidden_nonlin=to.relu,
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    qfcn_hparam = dict(
        hidden_sizes=[32, 32],
        hidden_nonlin=to.relu
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    qfcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace), **qfcn_hparam)
    qfcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace), **qfcn_hparam)

    # Algorithm
    algo_hparam = dict(
        max_iter=1000*env.max_steps,
        memory_size=100*env.max_steps,
        gamma=0.995,
        num_batch_updates=1,
        tau=0.995,
        ent_coeff_init=0.2,
        learn_ent_coeff=True,
        target_update_intvl=5,
        standardize_rew=False,
        min_steps=1,
Esempio n. 13
0
    )
    subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 5000,
            0.8 * dp_nom["mass_rot_pole"],
            dp_nom["mass_rot_pole"] / 5000,
            0.8 * dp_nom["length_pend_pole"],
            dp_nom["length_pend_pole"] / 5000,
            0.8 * dp_nom["length_rot_pole"],
            dp_nom["length_rot_pole"] / 5000,
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"],
            dp_nom["mass_pend_pole"] / 20,
            1.2 * dp_nom["mass_rot_pole"],
            dp_nom["mass_rot_pole"] / 20,
            1.2 * dp_nom["length_pend_pole"],
            dp_nom["length_pend_pole"] / 20,
            1.2 * dp_nom["length_rot_pole"],
            dp_nom["length_rot_pole"] / 20,
        ]),
    )

    # policy_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'policy.pt'))
    # valuefcn_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'valuefcn.pt'))

    # Algorithm
Esempio n. 14
0
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array(
            [
                0.95 * dp_nom["rope_length"],
                dp_nom["rope_length"] / 1000,
                0.0 * dp_nom["rope_damping"],
                dp_nom["rope_damping"] / 100,
                0.85 * dp_nom["ball_mass"],
                dp_nom["ball_mass"] / 1000,
                0.0 * dp_nom["joint_2_dryfriction"],
                dp_nom["joint_2_dryfriction"] / 100,
                0.0 * dp_nom["joint_2_damping"],
                dp_nom["joint_2_damping"] / 100,
            ]
        ),
        bound_up=np.array(
            [
                1.05 * dp_nom["rope_length"],
                dp_nom["rope_length"] / 20,
                2 * dp_nom["rope_damping"],
                dp_nom["rope_damping"] / 2,
                1.15 * dp_nom["ball_mass"],
                dp_nom["ball_mass"] / 10,
                2 * dp_nom["joint_2_dryfriction"],
                dp_nom["joint_2_dryfriction"] / 2,
                2 * dp_nom["joint_2_damping"],
                dp_nom["joint_2_damping"] / 2,
            ]
        ),
    )
Esempio n. 15
0
                 alpha=0.2)
        plt.scatter(x.data.numpy(), obj_fcn().numpy(), s=16, color=colors[e])

    plt.xlabel('$x$')
    plt.ylabel('$f(x)$')
    plt.legend()
    plt.show()
    assert noisy_nonlin_fcn(x, f=f, noise_std=noise_std) < noisy_nonlin_fcn(
        x_init, f=f, noise_std=noise_std)


@pytest.mark.parametrize('dt', [0.1], ids=['0.1'])
@pytest.mark.parametrize('t_end', [6.], ids=['1.'])
@pytest.mark.parametrize(
    't_intvl_space', [
        BoxSpace(0.1, 0.11, shape=1),
        BoxSpace(0.123, 0.456, shape=1),
        BoxSpace(10., 20., shape=1),
    ],
    ids=['small_time_intvl', 'real_time_intvl', 'large_time_intvl'])
@pytest.mark.parametrize('val_space', [BoxSpace(-5., 3., shape=1)],
                         ids=['-5_to_3'])
def test_skyline(dt: Union[int, float], t_end: Union[int, float],
                 t_intvl_space: BoxSpace, val_space: BoxSpace):
    # Create the skyline function
    t, vals = skyline(dt, t_end, t_intvl_space, val_space)
    assert isinstance(t, np.ndarray) and isinstance(vals, np.ndarray)
    assert len(t) == len(vals)


def test_check_prompt():
Esempio n. 16
0
from pyrado.policies.feed_forward.linear import LinearPolicy
from pyrado.policies.features import *
from pyrado.policies.recurrent.two_headed_rnn import TwoHeadedRNNPolicyBase
from pyrado.sampling.rollout import rollout
from pyrado.sampling.step_sequence import StepSequence
from pyrado.utils.data_sets import TimeSeriesDataSet
from pyrado.utils.data_types import RenderMode
from pyrado.utils.functions import skyline
from pyrado.utils.nn_layers import IndiNonlinLayer
from tests.conftest import m_needs_bullet, m_needs_mujoco, m_needs_rcs, m_needs_libtorch
from tests.environment_wrappers.mock_env import MockEnv


@pytest.fixture(scope='function',
                params=[
                    (skyline(0.02, 20., BoxSpace(0.5, 3, shape=(1,)), BoxSpace(-2., 3., shape=(1,)))[1],
                     0.7, 50, False, True),
                ],
                ids=['skyline_0.8_50_notstdized_scaled'])
def tsdataset(request):
    return TimeSeriesDataSet(
        data=to.from_numpy(request.param[0]),
        ratio_train=request.param[1],
        window_size=request.param[2],
        standardize_data=request.param[3],
        scale_min_max_data=request.param[4]
    )


@pytest.mark.features
@pytest.mark.parametrize(
Esempio n. 17
0
                         std_init=0.7573286998997557,
                         lr=6.999956625305722e-04,
                         max_grad_norm=1.,
                         num_workers=8,
                         lr_scheduler=lr_scheduler.ExponentialLR,
                         lr_scheduler_hparam=dict(gamma=0.999))
    subrtn = PPO(ex_dir, env_sim, policy, critic, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(bound_lo=np.array([
        0.8 * dp_nom['Mp'], dp_nom['Mp'] / 5000, 0.8 * dp_nom['Mr'],
        dp_nom['Mr'] / 5000, 0.8 * dp_nom['Lp'], dp_nom['Lp'] / 5000,
        0.8 * dp_nom['Lr'], dp_nom['Lr'] / 5000
    ]),
                         bound_up=np.array([
                             1.2 * dp_nom['Mp'], dp_nom['Mp'] / 20,
                             1.2 * dp_nom['Mr'], dp_nom['Mr'] / 20,
                             1.2 * dp_nom['Lp'], dp_nom['Lp'] / 20,
                             1.2 * dp_nom['Lr'], dp_nom['Lr'] / 20
                         ]))

    # policy_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'policy.pt'))
    # valuefcn_init = to.load(osp.join(pyrado.EXP_DIR, QQubeSwingUpSim.name, PPO.name, 'EXP_NAME', 'valuefcn.pt'))

    # Algorithm
    bayrn_hparam = dict(
        thold_succ=300.,
        max_iter=15,
        acq_fc='EI',
        # acq_param=dict(beta=0.2),
Esempio n. 18
0
 def state_space(self) -> Space:
     # Normalized time
     return BoxSpace(np.array([0.0]), np.array([1.0]), labels=["t"])
Esempio n. 19
0
WAM_Q_LIMITS_LO_7DOF = np.array([-2.6, -2.0, -2.8, -0.9, -4.76, -1.6, -3.0
                                 ]) + 5 * np.pi / 180
WAM_Q_LIMITS_UP_7DOF = np.array([+2.6, +2.0, +2.8, +3.1, +1.24, +1.6, +3.0
                                 ]) - 5 * np.pi / 180
# Arbitrarily set velocity limits [rad/s]
WAM_QD_LIMITS_LO_7DOF = -np.array([
    4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi
])
WAM_QD_LIMITS_UP_7DOF = +np.array([
    4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi, 4 * np.pi
])

# Torque limits
# 7 DoF
MAX_TORQUE_7DOF = np.array([150.0, 125.0, 40.0, 60.0, 5.0, 5.0, 2.0])
TORQUE_SPACE_WAM_7DOF = BoxSpace(-MAX_TORQUE_7DOF, MAX_TORQUE_7DOF)
# 4 DoF
MAX_TORQUE_4DOF = MAX_TORQUE_7DOF[:4]
TORQUE_SPACE_WAM_4DOF = BoxSpace(-MAX_TORQUE_4DOF, MAX_TORQUE_4DOF)

# Default PD gains from robcom / SL
WAM_PGAINS_7DOF = np.array([200.0, 300.0, 100.0, 100.0, 10.0, 10.0, 2.5])
WAM_DGAINS_7DOF = np.array([7.0, 15.0, 5.0, 2.5, 0.3, 0.3, 0.05])
WAM_PGAINS_4DOF = WAM_PGAINS_7DOF[:4]
WAM_DGAINS_4DOF = WAM_DGAINS_7DOF[:4]

# Desired initial joint angles
# 7 DoF
INIT_QPOS_DES_7DOF = np.array([0.0, 0.5876, 0.0, 1.36, 0.0, -0.321, -1.57])
# 4 DoF arm
INIT_QPOS_DES_4DOF = np.array([0.0, 0.6, 0.0, 1.25])
Esempio n. 20
0
 def state_space(self):
     max_state = np.array([100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0])
     return BoxSpace(-max_state, max_state, labels=["x_1", "x_2", "x_2", "x_3", "x_5", "x_6", "x_7", "x_8"])
Esempio n. 21
0
    # Select the data set
    data_set_name = args.mode or "skyline"

    # Experiment
    ex_dir = setup_experiment(TSPred.name, ADNPolicy.name)

    # Set seed if desired
    pyrado.set_seed(args.seed, verbose=True)

    # Load the data
    if data_set_name == "skyline":
        dt = 0.01
        _, vals = skyline(dt=dt,
                          t_end=20.0,
                          t_intvl_space=BoxSpace(0.5, 3, shape=(1, )),
                          val_space=BoxSpace(-2.0, 3.0, shape=(1, )))
        data = to.from_numpy(vals).to(dtype=to.get_default_dtype()).view(-1, 1)
    else:
        data = pd.read_csv(
            osp.join(pyrado.PERMA_DIR, "misc", f"{data_set_name}.csv"))
        if data_set_name == "daily_min_temperatures":
            data = to.tensor(data["Temp"].values,
                             dtype=to.get_default_dtype()).view(-1, 1)
            dt = 1.0
        elif data_set_name == "monthly_sunspots":
            data = to.tensor(data["Sunspots"].values,
                             dtype=to.get_default_dtype()).view(-1, 1)
            dt = 1.0
        elif "oscillation" in data_set_name:
            data = to.tensor(data["Positions"].values,
        5: ('ball_mass', 'std'),
        6: ('joint_stiction', 'mean'),
        7: ('joint_stiction', 'halfspan'),
        8: ('joint_damping', 'mean'),
        9: ('joint_damping', 'halfspan'),
    }
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Set the boundaries for the GP (must be consistent with dp_map)
    dp_nom = WAMBallInCupSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([0.95*dp_nom['rope_length'], dp_nom['rope_length']/1000,
                           0.*dp_nom['rope_damping'], dp_nom['rope_damping']/100,
                           0.85*dp_nom['ball_mass'], dp_nom['ball_mass']/1000,
                           0.*dp_nom['joint_stiction'], dp_nom['joint_stiction']/100,
                           0.*dp_nom['joint_damping'], dp_nom['joint_damping']/100]),
        bound_up=np.array([1.05*dp_nom['rope_length'], dp_nom['rope_length']/20,
                           2*dp_nom['rope_damping'], dp_nom['rope_damping']/2,
                           1.15*dp_nom['ball_mass'], dp_nom['ball_mass']/10,
                           2*dp_nom['joint_stiction'], dp_nom['joint_stiction']/2,
                           2*dp_nom['joint_damping'], dp_nom['joint_damping']/2])
    )

    # Setting the ip address to None ensures that robcom does not try to connect to the server pc
    env_real_hparams = dict(
        num_dof=4,
        max_steps=1750,
    )
    env_real = WAMBallInCupRealEpisodic(**env_real_hparams)

    # Policy
    policy_hparam = dict(
Esempio n. 23
0
def test_snapshots_notmeta(ex_dir, env: SimEnv, policy, algo_class,
                           algo_hparam):
    # Collect hyper-parameters, create algorithm, and train
    common_hparam = dict(max_iter=1, num_workers=1)
    common_hparam.update(algo_hparam)

    if issubclass(algo_class, ActorCritic):
        common_hparam.update(
            min_rollouts=3,
            critic=GAE(
                vfcn=FNNPolicy(spec=EnvSpec(env.obs_space, ValueFunctionSpace),
                               hidden_sizes=[16, 16],
                               hidden_nonlin=to.tanh)),
        )
    elif issubclass(algo_class, ParameterExploring):
        common_hparam.update(num_init_states_per_domain=1)
    elif issubclass(algo_class, (DQL, SAC)):
        common_hparam.update(memory_size=1000,
                             num_updates_per_step=2,
                             gamma=0.99,
                             min_rollouts=1)
        fnn_hparam = dict(hidden_sizes=[8, 8], hidden_nonlin=to.tanh)
        if issubclass(algo_class, DQL):
            # Override the setting
            env = BallOnBeamDiscSim(env.dt, env.max_steps)
            net = FNN(
                input_size=DiscreteActQValPolicy.get_qfcn_input_size(env.spec),
                output_size=DiscreteActQValPolicy.get_qfcn_output_size(),
                **fnn_hparam,
            )
            policy = DiscreteActQValPolicy(spec=env.spec, net=net)
        else:
            # Override the setting
            env = ActNormWrapper(env)
            policy = TwoHeadedGRUPolicy(env.spec,
                                        shared_hidden_size=8,
                                        shared_num_recurrent_layers=1)
            obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
            common_hparam.update(qfcn_1=FNNPolicy(
                spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam))
            common_hparam.update(qfcn_2=FNNPolicy(
                spec=EnvSpec(obsact_space, ValueFunctionSpace), **fnn_hparam))
    else:
        raise NotImplementedError

    # Simulate training
    algo = algo_class(ex_dir, env, policy, **common_hparam)
    algo.policy.param_values += to.tensor([42.0])
    if isinstance(algo, ActorCritic):
        algo.critic.vfcn.param_values += to.tensor([42.0])

    # Save and load
    algo.save_snapshot(meta_info=None)
    algo_loaded = Algorithm.load_snapshot(load_dir=ex_dir)
    assert isinstance(algo_loaded, Algorithm)
    policy_loaded = algo_loaded.policy
    if isinstance(algo, ActorCritic):
        critic_loaded = algo_loaded.critic

    # Check
    assert all(algo.policy.param_values == policy_loaded.param_values)
    if isinstance(algo, ActorCritic):
        assert all(
            algo.critic.vfcn.param_values == critic_loaded.vfcn.param_values)

    # Load the experiment. Since we did not save any hyper-parameters, we ignore the errors when loading.
    env, policy, extra = load_experiment(ex_dir)
    assert isinstance(env, Env)
    assert isinstance(policy, Policy)
    assert isinstance(extra, dict)
Esempio n. 24
0
def default_randomizer():
    return DomainRandomizer(
        NormalDomainParam(name="mass", mean=1.2, std=0.1, clip_lo=10, clip_up=100),
        UniformDomainParam(name="special", mean=0, halfspan=42, clip_lo=-7.4, roundint=True),
        NormalDomainParam(name="length", mean=4, std=0.6, clip_up=50.1),
        UniformDomainParam(name="time_delay", mean=13, halfspan=6, clip_up=17, roundint=True),
        MultivariateNormalDomainParam(name="multidim", mean=10 * to.ones((2,)), cov=2 * to.eye(2), clip_up=11),
    )


# --------------
# Other Fixtures
# --------------


@pytest.fixture(
    scope="function",
    params=[
        (skyline(0.02, 20.0, BoxSpace(0.5, 3, shape=(1,)), BoxSpace(-2.0, 3.0, shape=(1,)))[1], 0.7, 50, False, True),
    ],
    ids=["skyline_0.8_50_notstdized_scaled"],
)
def dataset_ts(request):
    return TimeSeriesDataSet(
        data=to.from_numpy(request.param[0]).to(dtype=to.get_default_dtype()),
        ratio_train=request.param[1],
        window_size=request.param[2],
        standardize_data=request.param[3],
        scale_min_max_data=request.param[4],
    )
Esempio n. 25
0
def train_and_eval(trial: optuna.Trial, study_dir: str, seed: int):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param study_dir: the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env_hparams = dict(physicsEngine="Bullet", dt=1 / 100.0, max_steps=500)
    env = BallOnPlate2DSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=trial.suggest_categorical(
            "shared_hidden_sizes_policy", [(16, 16), (32, 32), (64, 64),
                                           (16, 16, 16), (32, 32, 32)]),
        shared_hidden_nonlin=fcn_from_str(
            trial.suggest_categorical("shared_hidden_nonlin_policy",
                                      ["to_tanh", "to_relu"])),
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    qfcn_hparam = dict(
        hidden_sizes=trial.suggest_categorical("hidden_sizes_critic",
                                               [(16, 16), (32, 32), (64, 64),
                                                (16, 16, 16), (32, 32, 32)]),
        hidden_nonlin=fcn_from_str(
            trial.suggest_categorical("hidden_nonlin_critic",
                                      ["to_tanh", "to_relu"])),
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    qfcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                       **qfcn_hparam)
    qfcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                       **qfcn_hparam)

    # Algorithm
    algo_hparam = dict(
        num_workers=1,  # parallelize via optuna n_jobs
        max_iter=100 * env.max_steps,
        min_steps=trial.suggest_categorical(
            "min_steps_algo", [1]),  # 10, env.max_steps, 10*env.max_steps
        memory_size=trial.suggest_loguniform("memory_size_algo",
                                             1e2 * env.max_steps,
                                             1e4 * env.max_steps),
        tau=trial.suggest_uniform("tau_algo", 0.99, 1.0),
        ent_coeff_init=trial.suggest_uniform("ent_coeff_init_algo", 0.1, 0.9),
        learn_ent_coeff=trial.suggest_categorical("learn_ent_coeff_algo",
                                                  [True, False]),
        standardize_rew=trial.suggest_categorical("standardize_rew_algo",
                                                  [False]),
        gamma=trial.suggest_uniform("gamma_algo", 0.99, 1.0),
        target_update_intvl=trial.suggest_categorical(
            "target_update_intvl_algo", [1, 5]),
        num_updates_per_step=trial.suggest_categorical(
            "num_batch_updates_algo", [1, 5]),
        batch_size=trial.suggest_categorical("batch_size_algo",
                                             [128, 256, 512]),
        lr=trial.suggest_loguniform("lr_algo", 1e-5, 1e-3),
    )
    csv_logger = create_csv_step_logger(
        osp.join(study_dir, f"trial_{trial.number}"))
    algo = SAC(study_dir,
               env,
               policy,
               qfcn_1,
               qfcn_2,
               **algo_hparam,
               logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode="latest", seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelRolloutSampler(
        env, policy, num_workers=1,
        min_rollouts=min_rollouts)  # parallelize via optuna n_jobs
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts

    return mean_ret
Esempio n. 26
0
    if env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt)

    elif env_name == QCartPoleSwingUpSim.name:
        env = QCartPoleSwingUpSim(dt=dt)

    elif env_name == QQubeSwingUpSim.name:
        env = QQubeSwingUpSim(dt=dt)

    elif env_name == "wam-bic":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_bic import WAMBallInCupSim

        env = WAMBallInCupSim(num_dof=4)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "wam-jsc":  # avoid loading mujoco
        from pyrado.environments.mujoco.wam_jsc import WAMJointSpaceCtrlSim

        env = WAMJointSpaceCtrlSim(num_dof=7)
        env.init_space = BoxSpace(-pyrado.inf,
                                  pyrado.inf,
                                  shape=env.init_space.shape)

    elif env_name == "mg-ik":  # avoid loading _rcsenv
        from pyrado.environments.rcspysim.mini_golf import MiniGolfIKSim

        env = MiniGolfIKSim(dt=dt)
Esempio n. 27
0
def train_and_eval(trial: optuna.Trial, ex_dir: str, seed: [int, None]):
    """
    Objective function for the Optuna `Study` to maximize.

    .. note::
        Optuna expects only the `trial` argument, thus we use `functools.partial` to sneak in custom arguments.

    :param trial: Optuna Trial object for hyper-parameter optimization
    :param ex_dir: experiment's directory, i.e. the parent directory for all trials in this study
    :param seed: seed value for the random number generators, pass `None` for no seeding
    :return: objective function value
    """
    # Synchronize seeds between Optuna trials
    pyrado.set_seed(seed)

    # Environment
    env_hparams = dict(dt=1 / 100., max_steps=600)
    env = QQubeSim(**env_hparams)
    env = ActNormWrapper(env)

    # Policy
    policy_hparam = dict(
        shared_hidden_sizes=trial.suggest_categorical(
            'shared_hidden_sizes_policy',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        shared_hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('shared_hidden_nonlin_policy',
                                      ['to_tanh', 'to_relu'])),
    )
    policy = TwoHeadedFNNPolicy(spec=env.spec, **policy_hparam)

    # Critic
    q_fcn_hparam = dict(
        hidden_sizes=trial.suggest_categorical(
            'hidden_sizes_critic',
            [[16, 16], [32, 32], [64, 64], [16, 16, 16], [32, 32, 32]]),
        hidden_nonlin=fcn_from_str(
            trial.suggest_categorical('hidden_nonlin_critic',
                                      ['to_tanh', 'to_relu'])),
    )
    obsact_space = BoxSpace.cat([env.obs_space, env.act_space])
    q_fcn_1 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)
    q_fcn_2 = FNNPolicy(spec=EnvSpec(obsact_space, ValueFunctionSpace),
                        **q_fcn_hparam)

    # Algorithm
    algo_hparam = dict(
        num_sampler_envs=1,  # parallelize via optuna n_jobs
        max_iter=100 * env.max_steps,
        min_steps=trial.suggest_categorical(
            'min_steps_algo', [1]),  # , 10, env.max_steps, 10*env.max_steps
        memory_size=trial.suggest_loguniform('memory_size_algo',
                                             1e2 * env.max_steps,
                                             1e4 * env.max_steps),
        tau=trial.suggest_uniform('tau_algo', 0.99, 1.),
        alpha_init=trial.suggest_uniform('alpha_init_algo', 0.1, 0.9),
        learn_alpha=trial.suggest_categorical('learn_alpha_algo',
                                              [True, False]),
        standardize_rew=trial.suggest_categorical('standardize_rew_algo',
                                                  [False]),
        gamma=trial.suggest_uniform('gamma_algo', 0.99, 1.),
        target_update_intvl=trial.suggest_categorical(
            'target_update_intvl_algo', [1, 5]),
        num_batch_updates=trial.suggest_categorical('num_batch_updates_algo',
                                                    [1, 5]),
        batch_size=trial.suggest_categorical('batch_size_algo',
                                             [128, 256, 512]),
        lr=trial.suggest_loguniform('lr_algo', 1e-5, 1e-3),
    )
    csv_logger = create_csv_step_logger(
        osp.join(ex_dir, f'trial_{trial.number}'))
    algo = SAC(ex_dir,
               env,
               policy,
               q_fcn_1,
               q_fcn_2,
               **algo_hparam,
               logger=csv_logger)

    # Train without saving the results
    algo.train(snapshot_mode='latest', seed=seed)

    # Evaluate
    min_rollouts = 1000
    sampler = ParallelSampler(
        env, policy, num_envs=1,
        min_rollouts=min_rollouts)  # parallelize via optuna n_jobs
    ros = sampler.sample()
    mean_ret = sum([r.undiscounted_return() for r in ros]) / min_rollouts

    return mean_ret
Esempio n. 28
0
import numpy as np
import torch as to
from matplotlib import pyplot as plt
from matplotlib import ticker

import pyrado
from pyrado.policies.features import FeatureStack, RBFFeat
from pyrado.policies.linear import LinearPolicy
from pyrado.spaces import BoxSpace
from pyrado.utils.data_types import EnvSpec


if __name__ == '__main__':
    # Define some arbitrary EnvSpec
    obs_space = BoxSpace(bound_lo=np.array([-5., -12.]), bound_up=np.array([10., 6.]))
    act_space = BoxSpace(bound_lo=np.array([-1.]), bound_up=np.array([1.]))
    spec = EnvSpec(obs_space, act_space)

    num_fpd = 5
    num_eval_points = 500

    policy_hparam = dict(
        feats=FeatureStack([RBFFeat(num_feat_per_dim=num_fpd, bounds=obs_space.bounds, scale=None)])
    )
    policy = LinearPolicy(spec, **policy_hparam)

    eval_grid_0 = to.linspace(-5., 10, num_eval_points)
    eval_grid_1 = to.linspace(-12., 6, num_eval_points)
    eval_grid = to.stack([eval_grid_0, eval_grid_1], dim=1)

    feat_vals = to.empty(num_eval_points, num_fpd*obs_space.flat_dim)
Esempio n. 29
0
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import numpy as np

from pyrado.spaces import BoxSpace

# 4 DoF arm, 2 DoF actuated
init_qpos_des_4dof = np.array([0., 0.6, 0., 1.25])
act_min_wam_4dof = np.array([-1.985, -0.9, -10 * np.pi, -10 * np.pi])
act_max_wam_4dof = np.array([1.985, np.pi, 10 * np.pi, 10 * np.pi])
labels_4dof = ['q_1_des', 'q_3_des', 'q_dot_1_des', 'q_dot_3_des']
act_space_wam_4dof = BoxSpace(act_min_wam_4dof,
                              act_max_wam_4dof,
                              labels=labels_4dof)

# 7 DoF arm, 3 DoF actuated
init_qpos_des_7dof = np.array([0., 0.5876, 0., 1.36, 0., -0.321, -1.57])
act_min_wam_7dof = np.array(
    [-1.985, -0.9, -np.pi / 2, -10 * np.pi, -10 * np.pi, -10 * np.pi])
act_max_wam_7dof = np.array(
    [1.985, np.pi, np.pi / 2, 10 * np.pi, 10 * np.pi, 10 * np.pi])
labels_7dof = [
    'q_1_des', 'q_3_des', 'q_5_des', 'q_dot_1_des', 'q_dot_3_des',
    'q_dot_5_des'
]
act_space_wam_7dof = BoxSpace(act_min_wam_7dof,
                              act_max_wam_7dof,
                              labels=labels_7dof)
    #     num_init_states_per_domain=6,
    #     num_is_samples=10,
    #     expl_std_init=2.0,
    #     expl_std_min=0.02,
    #     symm_sampling=False,
    #     num_workers=32,
    # )
    # subrtn = PoWER(ex_dir, env_sim, policy, **subrtn_hparam)

    # Set the boundaries for the GP
    dp_nom = QQubeSwingUpSim.get_nominal_domain_param()
    ddp_space = BoxSpace(
        bound_lo=np.array([
            0.8 * dp_nom["mass_pend_pole"], 1e-8,
            0.8 * dp_nom["mass_rot_pole"], 1e-8
        ]),
        bound_up=np.array([
            1.2 * dp_nom["mass_pend_pole"], 1e-7,
            1.2 * dp_nom["mass_rot_pole"], 1e-7
        ]),
    )

    # Algorithm
    bayrn_hparam = dict(
        max_iter=15,
        acq_fc="UCB",
        acq_param=dict(beta=0.25),
        acq_restarts=500,
        acq_samples=1000,
        num_init_cand=4,
        warmstart=False,
        num_eval_rollouts_real=100,