コード例 #1
0
def create_bob_setup():
    # Environments
    env_hparams = dict(dt=1 / 100., max_steps=500)
    env_real = BallOnBeamSim(**env_hparams)
    env_real.domain_param = dict(
        # l_beam=1.95,
        # ang_offset=-0.03,
        g=10.81)

    env_sim = BallOnBeamSim(**env_hparams)
    randomizer = DomainRandomizer(
        # NormalDomainParam(name='l_beam', mean=0, std=1e-12, clip_lo=1.5, clip_up=3.5),
        # UniformDomainParam(name='ang_offset', mean=0, halfspan=1e-12),
        NormalDomainParam(name='g', mean=0, std=1e-12), )
    env_sim = DomainRandWrapperLive(env_sim, randomizer)
    dp_map = {
        # 0: ('l_beam', 'mean'), 1: ('l_beam', 'std'),
        # 2: ('ang_offset', 'mean'), 3: ('ang_offset', 'halfspan')
        0: ('g', 'mean'),
        1: ('g', 'std')
    }
    env_sim = MetaDomainRandWrapper(env_sim, dp_map)

    # Policies (the behavioral policy needs to be deterministic)
    behavior_policy = LinearPolicy(env_sim.spec,
                                   feats=FeatureStack(
                                       [identity_feat, sin_feat]))
    behavior_policy.param_values = to.tensor(
        [3.8090, -3.8036, -1.0786, -2.4510, -0.9875, -1.3252, 3.1503, 1.4443])
    prior = DomainRandomizer(
        # NormalDomainParam(name='l_beam', mean=2.05, std=2.05/10),
        # UniformDomainParam(name='ang_offset', mean=0.03, halfspan=0.03/10),
        NormalDomainParam(name='g', mean=8.81, std=8.81 / 10), )
    # trafo_mask = [False, True, False, True]
    trafo_mask = [True, True]
    ddp_policy = DomainDistrParamPolicy(mapping=dp_map,
                                        trafo_mask=trafo_mask,
                                        prior=prior,
                                        scale_params=True)

    return env_sim, env_real, env_hparams, dp_map, behavior_policy, ddp_policy
コード例 #2
0
 def __init__(self):
     ShowBase.__init__(self)
     self.done = False
     self.state = None
     self.param = None
     print("a")
     self.ro = rollout(
         env,
         policy,
         render_mode=RenderMode(text=args.verbose,
                                video=args.animation),
         eval=True,
         max_steps=max_steps,
         stop_on_done=not args.relentless,
         reset_kwargs=dict(domain_param=self.param,
                           init_state=self.state),
     )
     print("hoi")
     print_domain_params(env.domain_param)
     print_cbt(f"Return: {self.ro.undiscounted_return()}",
               "g",
               bright=True)
     self.done, self.state, self.param = after_rollout_query(
         env, policy, self.ro)
     print("1")
     self.bob = BallOnBeamSim(2)
     print("2")
     self.pos, self.r_ball, self.a, self.l_beam, self.d_beam = self.bob._init_anim(
     )
     print("3")
     self.ball = self.loader.loadModel("my_models/ball")
     self.ball.reparentTo(self.render)
     self.ball.setPos(self.pos)
     self.box = self.loader.loadModel("my_models/box")
     self.box.reparentTo(self.render)
     self.box.setPos(0, 0, 0)
     self.box.setScale(self.l_beam, self.d_beam, 2 * self.d_beam)
     self.camera.setPos(0, -10, 0)
コード例 #3
0
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name='g', mean=dp_nom['g'], std=dp_nom['g']/10, clip_lo=1e-4),
        NormalDomainParam(name='m_ball', mean=dp_nom['m_ball'], std=dp_nom['m_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='r_ball', mean=dp_nom['r_ball'], std=dp_nom['r_ball']/5, clip_lo=1e-4),
        NormalDomainParam(name='m_beam', mean=dp_nom['m_beam'], std=dp_nom['m_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='l_beam', mean=dp_nom['l_beam'], std=dp_nom['l_beam']/5, clip_lo=1e-3),
        NormalDomainParam(name='d_beam', mean=dp_nom['d_beam'], std=dp_nom['d_beam']/5, clip_lo=1e-3),
        UniformDomainParam(name='c_frict', mean=dp_nom['c_frict'], halfspan=dp_nom['c_frict'], clip_lo=0),
        UniformDomainParam(name='ang_offset', mean=0./180*np.pi, halfspan=0.1/180*np.pi)
    )
コード例 #4
0
def create_default_randomizer_bob() -> DomainRandomizer:
    """
    Create the default randomizer for the `BallOnBeamSim`.

    :return: randomizer based on the nominal domain parameter values
    """
    from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim

    dp_nom = BallOnBeamSim.get_nominal_domain_param()
    return DomainRandomizer(
        NormalDomainParam(name="gravity_const",
                          mean=dp_nom["gravity_const"],
                          std=dp_nom["gravity_const"] / 10,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_mass",
                          mean=dp_nom["ball_mass"],
                          std=dp_nom["ball_mass"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="ball_radius",
                          mean=dp_nom["ball_radius"],
                          std=dp_nom["ball_radius"] / 5,
                          clip_lo=1e-4),
        NormalDomainParam(name="beam_mass",
                          mean=dp_nom["beam_mass"],
                          std=dp_nom["beam_mass"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_length",
                          mean=dp_nom["beam_length"],
                          std=dp_nom["beam_length"] / 5,
                          clip_lo=1e-3),
        NormalDomainParam(name="beam_thickness",
                          mean=dp_nom["beam_thickness"],
                          std=dp_nom["beam_thickness"] / 5,
                          clip_lo=1e-3),
        UniformDomainParam(name="friction_coeff",
                           mean=dp_nom["friction_coeff"],
                           halfspan=dp_nom["friction_coeff"],
                           clip_lo=0),
        UniformDomainParam(name="ang_offset",
                           mean=0.0 / 180 * np.pi,
                           halfspan=0.1 / 180 * np.pi),
    )
コード例 #5
0
                          'ident-sin',
                          seed=1001)
"""
Set up the environment a.k.a. domain to train in. After creating the environment, you can apply various wrappers which
are modular. Note that the order of wrappers might be of importance. For example, wrapping an environment with an
`ObsNormWrapper` and then with an `GaussianObsNoiseWrapper` applies the noise on the normalized observations, and yields
different results than the reverse order of wrapping.
Environments in Pyrado can be of different types: (i) written in Python only (like the Qunaser simulations or simple
OpenAI Gym environments), (ii) wrapped as well as self-designed MuJoCo-based simulations, or (iii) self-designed
robotic environments powered by Rcs using either the Bullet or Vortex physics engine. None of the simulations includes
any computer vision aspects. It is all about dynamics-based interaction and (continuous) control. The degree of
randomization for the environments varies strongly, since it is a lot of work to randomize them properly (including
testing) and I have to graduate after all ;)
"""
env_hparams = dict(dt=1 / 50., max_steps=300)
env = BallOnBeamSim(**env_hparams)
env = ActNormWrapper(env)
"""
Set up the policy after the environment since it needs to know the dimensions of the policies observation and action
space. There are many different policy architectures available under `Pyrado/pyrado/policies`, which significantly
vary in terms of required hyper-parameters. You can find some examples at `Pyrado/scripts/training`.
Note that all policies must inherit from `Policy` which inherits from `torch.nn.Module`. Moreover, all `Policy`
instances are deterministic. The exploration is handled separately (see `Pyrado/pyrado/exploration`).
"""
policy_hparam = dict(feats=FeatureStack([identity_feat, sin_feat]))
policy = LinearPolicy(spec=env.spec, **policy_hparam)
"""
Specify the algorithm you want to use for learning the policy parameters.
For deterministic sampling, you need to set `num_sampler_envs=1`. If `num_sampler_envs>1`, PyTorch's multiprocessing
library will be used to parallelize sampling from the environment on the CPU. The resulting behavior is non-deterministic,
i.e. even for the same random seed, you will get different results.
コード例 #6
0
import pytest

from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
from pyrado.environments.pysim.quanser_ball_balancer import QBallBalancerSim
from pyrado.environments.sim_base import SimEnv
from pyrado.exploration.stochastic_action import NormalActNoiseExplStrat
from pyrado.exploration.stochastic_params import NormalParamNoise
from pyrado.policies.base import Policy
from pyrado.policies.features import *


@pytest.mark.parametrize(
    "env",
    [
        BallOnBeamSim(dt=0.02, max_steps=1),
        QBallBalancerSim(dt=0.02, max_steps=1),
    ],
    ids=["bob", "qbb"],
)
@pytest.mark.parametrize("policy", ["linear_policy", "fnn_policy"],
                         ids=["lin", "fnn"],
                         indirect=True)
def test_noise_on_act(env: SimEnv, policy: Policy):
    for _ in range(100):
        # Init the exploration strategy
        act_noise_strat = NormalActNoiseExplStrat(policy,
                                                  std_init=0.5,
                                                  train_mean=True)

        # Set new parameters for the exploration noise
コード例 #7
0
ファイル: test_utils.py プロジェクト: arlene-kuehn/SimuRLacra
    rowvar = not data_along_rows
    cov_np = np.cov(x, rowvar=rowvar)
    cov_pyrado = cov(to.from_numpy(x), data_along_rows=data_along_rows).numpy()

    assert cov_pyrado.shape[0] == cov_pyrado.shape[1]
    if data_along_rows:
        assert cov_np.shape[0] == x.shape[1]
        assert cov_pyrado.shape[0] == x.shape[1]
    else:
        assert cov_np.shape[0] == x.shape[0]
        assert cov_pyrado.shape[0] == x.shape[0]
    assert np.allclose(cov_np, cov_pyrado)


@pytest.mark.parametrize('env, expl_strat', [
    (BallOnBeamSim(dt=0.02, max_steps=100),
     DummyPolicy(BallOnBeamSim(dt=0.02, max_steps=100).spec)),
],
                         ids=['bob_dummy'])
def test_concat_rollouts(env, expl_strat):
    ro1 = rollout(env, expl_strat)
    ro2 = rollout(env, expl_strat)
    ro_cat = StepSequence.concat([ro1, ro2])
    assert isinstance(ro_cat, StepSequence)
    assert ro_cat.length == ro1.length + ro2.length


@pytest.mark.parametrize('x, y', [
    (to.tensor([1., 2., 3.]), to.tensor([1., 2., 3.])),
    (to.tensor([1., 0., 1.]), to.tensor([1., 1e12, 1.])),
    (to.tensor([0., 0., 0.]), to.tensor([1., 2, 3.])),
コード例 #8
0
def default_omo():
    return BallOnBeamSim(dt=0.02, max_steps=300)
コード例 #9
0
def default_bob():
    return BallOnBeamSim(dt=0.01, max_steps=500)
コード例 #10
0
ファイル: sb_pysim_render.py プロジェクト: fdamken/SimuRLacra
    elif args.env_name == QBallBalancerSim.name:
        env = QBallBalancerSim(dt=dt, max_steps=int(5 / dt))
        state = np.array(
            [2 / 180 * np.pi, 2 / 180 * np.pi, 0.1, -0.08, 0, 0, 0, 0])

    elif args.env_name == OneMassOscillatorSim.name:
        env = OneMassOscillatorSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([-0.7, 0])

    elif args.env_name == PendulumSim.name:
        env = PendulumSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([87 / 180 * np.pi, 0])

    elif args.env_name == BallOnBeamSim.name:
        env = BallOnBeamSim(dt=dt, max_steps=int(5 / dt))
        state = np.array([-0.25, 0, 0, +20 / 180 * np.pi])

    else:
        raise pyrado.ValueErr(
            given=args.env_name,
            eq_constraint=
            f"{QCartPoleSwingUpSim.name}, {QQubeSwingUpSim.name}, {QBallBalancerSim.name}, "
            f"{OneMassOscillatorSim.name}, {PendulumSim.name}, or {BallOnBeamSim.name}",
        )

    policy = IdlePolicy(env.spec)

    # Simulate
    done, param = False, None
    while not done:
コード例 #11
0
                      policy_infos=policy_infos,
                      hidden=hidden,
                      data_format=data_format)

    # Pickle/unpickle
    ro2 = pickle.loads(pickle.dumps(ro, pickle.HIGHEST_PROTOCOL))

    for step, step_pi in zip(ro, ro2):
        assert step.reward == step_pi.reward
        assert (step.observation == step_pi.observation).all()
        assert (step.action == step_pi.action).all()
        assert step.done == step_pi.done


@pytest.mark.parametrize('env', [
    BallOnBeamSim(dt=0.01, max_steps=200),
],
                         ids=['bob_linpol'])
def test_advantage_calculation(env, linear_policy):
    ro = rollout(env, linear_policy)
    gamma = 0.99
    lamb = 0.95

    # Add dummy values
    values = np.ones_like(ro.rewards)
    if not ro.done[-1]:
        values = to.cat([values, 0])
    ro.add_data('values', values)

    gae1 = gae_returns(ro, gamma, lamb)
コード例 #12
0
ファイル: sb_sampler.py プロジェクト: fdamken/SimuRLacra
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Script to sample some rollouts using the ParallelRolloutSampler
"""
from tabulate import tabulate

from pyrado.environment_wrappers.action_normalization import ActNormWrapper
from pyrado.environments.pysim.ball_on_beam import BallOnBeamSim
from pyrado.policies.features import FeatureStack, identity_feat, squared_feat
from pyrado.policies.feed_back.linear import LinearPolicy
from pyrado.sampling.parallel_rollout_sampler import ParallelRolloutSampler

if __name__ == "__main__":
    # Set up environment
    env = BallOnBeamSim(dt=0.02, max_steps=500)
    env = ActNormWrapper(env)

    # Set up policy
    feats = FeatureStack(identity_feat, squared_feat)
    policy = LinearPolicy(env.spec, feats)

    # Set up sampler
    sampler = ParallelRolloutSampler(env,
                                     policy,
                                     num_workers=2,
                                     min_rollouts=2000)

    # Sample and print
    ros = sampler.sample()
    print(
コード例 #13
0
    cov_np = np.cov(x, rowvar=rowvar)
    cov_pyrado = cov(to.from_numpy(x), data_along_rows=data_along_rows).numpy()

    assert cov_pyrado.shape[0] == cov_pyrado.shape[1]
    if data_along_rows:
        assert cov_np.shape[0] == x.shape[1]
        assert cov_pyrado.shape[0] == x.shape[1]
    else:
        assert cov_np.shape[0] == x.shape[0]
        assert cov_pyrado.shape[0] == x.shape[0]
    assert np.allclose(cov_np, cov_pyrado)


@pytest.mark.parametrize(
    'env, expl_strat', [
        (BallOnBeamSim(dt=0.02, max_steps=100),
         DummyPolicy(BallOnBeamSim(dt=0.02, max_steps=100).spec)),
    ], ids=['bob_dummy']
)
def test_concat_rollouts(env, expl_strat):
    ro1 = rollout(env, expl_strat)
    ro2 = rollout(env, expl_strat)
    ro_cat = StepSequence.concat([ro1, ro2])
    assert isinstance(ro_cat, StepSequence)
    assert ro_cat.length == ro1.length + ro2.length


@pytest.mark.parametrize(
    'x, y', [
        (to.tensor([1., 2., 3.]), to.tensor([1., 2., 3.])),
        (to.tensor([1., 0., 1.]), to.tensor([1., 1e12, 1.])),
コード例 #14
0
        env,
        noise_mean=0.1 * np.ones(env.act_space.shape),
        noise_std=0.2 * np.ones(env.act_space.shape))
    for _ in range(3):
        # Sample some values
        rand_act = env.act_space.sample_uniform()
        wrapped_env.reset()
        obs_nom, _, _, _ = env.step(rand_act)
        obs_wrapped, _, _, _ = wrapped_env.step(rand_act)
        # Different actions can not lead to the same observation
        assert not np.all(obs_nom == obs_wrapped)


@pytest.mark.wrappers
@pytest.mark.parametrize('env', [
    BallOnBeamSim(dt=0.05, max_steps=1),
],
                         ids=['bob'])
def test_order_act_noise_act_norm(env):
    # First noise wrapper then normalization wrapper
    wrapped_env_noise = GaussianActNoiseWrapper(
        env,
        noise_mean=0.2 * np.ones(env.act_space.shape),
        noise_std=0.1 * np.ones(env.act_space.shape))
    wrapped_env_noise_norm = ActNormWrapper(wrapped_env_noise)

    # First normalization wrapper then noise wrapper
    wrapped_env_norm = ActNormWrapper(env)
    wrapped_env_norm_noise = GaussianActNoiseWrapper(
        wrapped_env_norm,
        noise_mean=0.2 * np.ones(env.act_space.shape),
コード例 #15
0
    'num_dim, method', [
        (1, 'uniform'), (1, 'uniform'),
        (3, 'uniform'), (3, 'normal'), (3, 'Marsaglia'),
        (4, 'uniform'), (4, 'normal'), (4, 'Marsaglia'),
        (15, 'uniform'), (15, 'normal')
    ]
)
def test_sample_from_unit_sphere_surface(num_dim, method):
    s = sample_from_hyper_sphere_surface(num_dim, method)
    assert 0.95 <= to.norm(s, p=2) <= 1.05


@pytest.mark.sampling
@pytest.mark.parametrize(
    'env, policy', [
        (BallOnBeamSim(dt=0.02, max_steps=100),
         LinearPolicy(BallOnBeamSim(dt=0.02, max_steps=100).spec,
                      FeatureStack([const_feat, identity_feat, squared_feat]))),
        (QBallBalancerSim(dt=0.02, max_steps=100),
         LinearPolicy(QBallBalancerSim(dt=0.02, max_steps=100).spec,
                      FeatureStack([const_feat, identity_feat, squared_feat])))
    ], ids=['bob_linpol', 'qbb_linpol']
)
def test_rollout_wo_exploration(env, policy):
    ro = rollout(env, policy, render_mode=RenderMode())
    assert isinstance(ro, StepSequence)
    assert len(ro) <= env.max_steps


@pytest.mark.parametrize(
    'mean, cov', [
コード例 #16
0
ファイル: test_utils.py プロジェクト: fdamken/SimuRLacra
    cov_pyrado = cov(to.from_numpy(x), data_along_rows=data_along_rows).numpy()

    assert cov_pyrado.shape[0] == cov_pyrado.shape[1]
    if data_along_rows:
        assert cov_np.shape[0] == x.shape[1]
        assert cov_pyrado.shape[0] == x.shape[1]
    else:
        assert cov_np.shape[0] == x.shape[0]
        assert cov_pyrado.shape[0] == x.shape[0]
    assert np.allclose(cov_np, cov_pyrado)


@pytest.mark.parametrize(
    "env, expl_strat",
    [
        (BallOnBeamSim(dt=0.02, max_steps=100), DummyPolicy(BallOnBeamSim(dt=0.02, max_steps=100).spec)),
    ],
    ids=["bob_dummy"],
)
def test_concat_rollouts(env, expl_strat):
    ro1 = rollout(env, expl_strat)
    ro2 = rollout(env, expl_strat)
    ro_cat = StepSequence.concat([ro1, ro2])
    assert isinstance(ro_cat, StepSequence)
    assert ro_cat.length == ro1.length + ro2.length


@pytest.mark.parametrize(
    "x, y",
    [
        (to.tensor([1.0, 2.0, 3.0]), to.tensor([1.0, 2.0, 3.0])),