def test_mask_invert():
    mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three']))

    # Use a simple mask to drop the second element
    mask = [0, 1, 0]
    wenv = ObsPartialWrapper(mockenv, mask, keep_selected=True)

    # Test some observation values
    mockenv.next_obs = [1, 2, 3]
    obs, _, _, _ = wenv.step(None)
    assert list(obs) == [2]

    mockenv.next_obs = [4, 7, 9]
    obs, _, _, _ = wenv.step(None)
    assert list(obs) == [7]
Exemple #2
0
def wrap_like_other_env(env_targ: Env, env_src: [SimEnv, EnvWrapper]) -> Env:
    """
    Wrap a given real environment like it's simulated counterpart (except the domain randomization of course).

    :param env_targ: target environment e.g. environment representing the physical device
    :param env_src: source environment e.g. simulation environment used for training
    :return: target environment
    """
    if env_src.dt > env_targ.dt:
        ds_factor = int(env_src.dt / env_targ.dt)
        env_targ = DownsamplingWrapper(env_targ, ds_factor)
        print_cbt(
            f'Wrapped the env with an DownsamplingWrapper of factor {ds_factor}.',
            'c')

    if typed_env(env_src, ActNormWrapper) is not None:
        env_targ = ActNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ActNormWrapper.', 'c')

    if typed_env(env_src, ObsNormWrapper) is not None:
        env_targ = ObsNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsNormWrapper.', 'c')
    elif typed_env(env_src, ObsRunningNormWrapper) is not None:
        env_targ = ObsRunningNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsRunningNormWrapper.', 'c')

    if typed_env(env_src, ObsPartialWrapper) is not None:
        env_targ = ObsPartialWrapper(env_targ,
                                     mask=typed_env(
                                         env_src, ObsPartialWrapper).keep_mask,
                                     keep_selected=True)
        print_cbt('Wrapped the env with an ObsPartialWrapper.', 'c')

    return env_targ
Exemple #3
0
def test_wrap_like_other_env(env: SimEnv):
    wenv_like = deepcopy(env)
    wenv_like.dt /= 3

    wenv = DownsamplingWrapper(env, factor=3)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv, use_downsampling=True)
    assert type(wenv_like) == type(wenv)

    wenv = ActNormWrapper(wenv)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)

    wenv = ObsNormWrapper(wenv)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)

    wenv = ObsRunningNormWrapper(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)

    wenv = ObsPartialWrapper(wenv, idcs=["x"])
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)
def test_spaces():
    mockenv = MockEnv(obs_space=BoxSpace([-1, -2, -3], [1, 2, 3], labels=['one', 'two', 'three']))

    # Use a simple mask to drop the second element
    mask = [0, 1, 0]
    wenv = ObsPartialWrapper(mockenv, mask)

    # Check resulting space
    lb, ub = wenv.obs_space.bounds
    assert list(lb) == [-1, -3]
    assert list(ub) == [1, 3]
    assert list(wenv.obs_space.labels) == ['one', 'three']
def test_combination():
    env = QCartPoleSwingUpSim(dt=1/50., max_steps=20)

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {'x_dot': -213., 'theta_dot': -42.}
    eub = {'x_dot': 213., 'theta_dot': 42., 'x': 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
    ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations)

    env_rnp = ObsPartialWrapper(env_rn, idcs=['x_dot', r'cos_theta'])
    ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(env_rnp,
                                       noise_mean=0.5*np.ones(env_rnp.act_space.shape),
                                       noise_std=0.1*np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpa.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpa.observations)

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert isinstance(inner_env(env_rnpd), QCartPoleSwingUpSim)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)
        observePredictedCollisionCost=False,
        observeManipulabilityIndex=False,
        observeCurrentManipulability=True,
        observeDynamicalSystemGoalDistance=True,
        observeDynamicalSystemDiscrepancy=False,
        observeTaskSpaceDiscrepancy=True,
    )
    env = Planar3LinkTASim(**env_hparams)
    # env = Planar3LinkIKActivationSim(**env_hparams)
    # eub = {
    #     'GD_DS0': 2.,
    #     'GD_DS1': 2.,
    #     'GD_DS2': 2.,
    # }
    # env = ObsNormWrapper(env, explicit_ub=eub)
    env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z'])
    # env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z', 'Effector_Xd', 'Effector_Zd'])

    # Policy
    policy_hparam = dict(
        hidden_size=3,
        conv_out_channels=1,
        mirrored_conv_weights=True,
        conv_kernel_size=1,
        conv_padding_mode='circular',
        init_param_kwargs=dict(bell=True),
        activation_nonlin=to.sigmoid,
        tau_init=10.,
        tau_learnable=True,
        kappa_init=0,
        kappa_learnable=False,
Exemple #7
0
def wrap_like_other_env(
        env_targ: Union[SimEnv, RealEnv],
        env_src: [SimEnv, EnvWrapper],
        use_downsampling: bool = False) -> Union[SimEnv, RealEnv]:
    """
    Wrap a given real environment like it's simulated counterpart (except the domain randomization of course).

    :param env_targ: target environment e.g. environment representing the physical device
    :param env_src: source environment e.g. simulation environment used for training
    :param use_downsampling: apply a wrapper that downsamples the actions if the sampling frequencies don't match
    :return: target environment
    """
    if use_downsampling and env_src.dt > env_targ.dt:
        if typed_env(env_targ, DownsamplingWrapper) is None:
            ds_factor = int(env_src.dt / env_targ.dt)
            env_targ = DownsamplingWrapper(env_targ, ds_factor)
            print_cbt(
                f"Wrapped the target environment with a DownsamplingWrapper of factor {ds_factor}.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with a DownsamplingWrapper.",
                "y")

    if typed_env(env_src, ActNormWrapper) is not None:
        if typed_env(env_targ, ActNormWrapper) is None:
            env_targ = ActNormWrapper(env_targ)
            print_cbt("Wrapped the target environment with an ActNormWrapper.",
                      "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ActNormWrapper.",
                "y")

    if typed_env(env_src, ObsNormWrapper) is not None:
        if typed_env(env_targ, ObsNormWrapper) is None:
            env_targ = ObsNormWrapper(env_targ)
            print_cbt("Wrapped the target environment with an ObsNormWrapper.",
                      "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsNormWrapper.",
                "y")

    if typed_env(env_src, ObsRunningNormWrapper) is not None:
        if typed_env(env_targ, ObsRunningNormWrapper) is None:
            env_targ = ObsRunningNormWrapper(env_targ)
            print_cbt(
                "Wrapped the target environment with an ObsRunningNormWrapper.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsRunningNormWrapper.",
                "y")

    if typed_env(env_src, ObsPartialWrapper) is not None:
        if typed_env(env_targ, ObsPartialWrapper) is None:
            env_targ = ObsPartialWrapper(env_targ,
                                         mask=typed_env(
                                             env_src,
                                             ObsPartialWrapper).keep_mask,
                                         keep_selected=True)
            print_cbt(
                "Wrapped the target environment with an ObsPartialWrapper.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsPartialWrapper.",
                "y")

    return env_targ
Exemple #8
0
        checkJointLimits=True,
        taskCombinationMethod="sum",
        collisionAvoidanceIK=False,
        observeVelocity=False,
        observeCollisionCost=False,
        observePredictedCollisionCost=False,
        observeManipulabilityIndex=False,
        observeCurrentManipulability=True,
        observeDynamicalSystemDiscrepancy=False,
        observeTaskSpaceDiscrepancy=False,
        observeForceTorque=True,
        observeDynamicalSystemGoalDistance=False,
    )
    env = BoxLiftingVelIKActivationSim(**env_hparams)

    env = ObsPartialWrapper(env, idcs=["Box_Y", "Box_Z", "Box_A"])

    # Domain randomizer
    dp_nom = env.get_nominal_domain_param()
    randomizer = create_default_randomizer(env)
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy_hparam = dict(
        tau_init=50.0,
        tau_learnable=True,
        kappa_init=1e-3,
        kappa_learnable=True,
        activation_nonlin=to.tanh,
        potentials_dyn_fcn=pd_cubic,
        potential_init_learnable=False,
Exemple #9
0
        observePredictedCollisionCost=False,
        observeManipulabilityIndex=False,
        observeCurrentManipulability=True,
        observeDynamicalSystemGoalDistance=True,
        observeDynamicalSystemDiscrepancy=False,
        observeTaskSpaceDiscrepancy=True,
    )
    env = Planar3LinkTASim(**env_hparams)
    # env = Planar3LinkIKActivationSim(**env_hparams)
    # eub = {
    #     'GD_DS0': 2.,
    #     'GD_DS1': 2.,
    #     'GD_DS2': 2.,
    # }
    # env = ObsNormWrapper(env, explicit_ub=eub)
    env = ObsPartialWrapper(env, idcs=["Effector_DiscrepTS_X", "Effector_DiscrepTS_Z"])
    # env = ObsPartialWrapper(env, idcs=['Effector_DiscrepTS_X', 'Effector_DiscrepTS_Z', 'Effector_Xd', 'Effector_Zd'])

    # Policy
    policy_hparam = dict(
        hidden_size=3,
        conv_out_channels=1,
        mirrored_conv_weights=True,
        conv_kernel_size=1,
        conv_padding_mode="circular",
        init_param_kwargs=dict(bell=True),
        activation_nonlin=to.sigmoid,
        tau_init=10.0,
        tau_learnable=True,
        kappa_init=1e-3,
        kappa_learnable=True,
Exemple #10
0
def test_combination(env: SimEnv):
    pyrado.set_seed(0)
    env.max_steps = 20

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r,
                DummyPolicy(env_r.spec),
                eval=True,
                seed=0,
                render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {"x_dot": -213.0, "theta_dot": -42.0}
    eub = {"x_dot": 213.0, "theta_dot": 42.0, "x": 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r,
                   DummyPolicy(env_r.spec),
                   eval=True,
                   seed=0,
                   render_mode=RenderMode())
    ro_rn = rollout(env_rn,
                    DummyPolicy(env_rn.spec),
                    eval=True,
                    seed=0,
                    render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations),
                       ro_rn.observations)

    env_rnp = ObsPartialWrapper(
        env_rn, idcs=[env.obs_space.labels[2], env.obs_space.labels[3]])
    ro_rnp = rollout(env_rnp,
                     DummyPolicy(env_rnp.spec),
                     eval=True,
                     seed=0,
                     render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(
        env_rnp,
        noise_mean=0.5 * np.ones(env_rnp.act_space.shape),
        noise_std=0.1 * np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa,
                      DummyPolicy(env_rnpa.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert not np.allclose(
        ro_rnp.observations,
        ro_rnpa.observations)  # the action noise changed to rollout

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd,
                      DummyPolicy(env_rnpd.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert type(inner_env(env_rnpd)) == type(env)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)