Ejemplo n.º 1
0
def test_denormalization(mock_obs_space):
    mockenv = MockEnv(obs_space=mock_obs_space)
    wenv = ObsNormWrapper(mockenv)

    for _ in range(100):
        # Generate random observations
        obs, _, _, _ = wenv.step(np.array([0, 0, 0]))
        assert (abs(obs) <= 1).all
Ejemplo n.º 2
0
def wrap_like_other_env(env_targ: Env, env_src: [SimEnv, EnvWrapper]) -> Env:
    """
    Wrap a given real environment like it's simulated counterpart (except the domain randomization of course).

    :param env_targ: target environment e.g. environment representing the physical device
    :param env_src: source environment e.g. simulation environment used for training
    :return: target environment
    """
    if env_src.dt > env_targ.dt:
        ds_factor = int(env_src.dt / env_targ.dt)
        env_targ = DownsamplingWrapper(env_targ, ds_factor)
        print_cbt(
            f'Wrapped the env with an DownsamplingWrapper of factor {ds_factor}.',
            'c')

    if typed_env(env_src, ActNormWrapper) is not None:
        env_targ = ActNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ActNormWrapper.', 'c')

    if typed_env(env_src, ObsNormWrapper) is not None:
        env_targ = ObsNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsNormWrapper.', 'c')
    elif typed_env(env_src, ObsRunningNormWrapper) is not None:
        env_targ = ObsRunningNormWrapper(env_targ)
        print_cbt('Wrapped the env with an ObsRunningNormWrapper.', 'c')

    if typed_env(env_src, ObsPartialWrapper) is not None:
        env_targ = ObsPartialWrapper(env_targ,
                                     mask=typed_env(
                                         env_src, ObsPartialWrapper).keep_mask,
                                     keep_selected=True)
        print_cbt('Wrapped the env with an ObsPartialWrapper.', 'c')

    return env_targ
Ejemplo n.º 3
0
def test_wrap_like_other_env(env: SimEnv):
    wenv_like = deepcopy(env)
    wenv_like.dt /= 3

    wenv = DownsamplingWrapper(env, factor=3)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv, use_downsampling=True)
    assert type(wenv_like) == type(wenv)

    wenv = ActNormWrapper(wenv)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)

    wenv = ObsNormWrapper(wenv)
    assert type(wenv_like) != type(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)

    wenv = ObsRunningNormWrapper(wenv)
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)

    wenv = ObsPartialWrapper(wenv, idcs=["x"])
    wenv_like = wrap_like_other_env(wenv_like, wenv)
    assert type(wenv_like) == type(wenv)
    assert type(wenv_like.wrapped_env) == type(wenv.wrapped_env)
Ejemplo n.º 4
0
def test_space(mock_obs_space):
    mockenv = MockEnv(obs_space=mock_obs_space)
    wenv = ObsNormWrapper(mockenv)

    # Check observation space bounds
    lb, ub = wenv.obs_space.bounds
    assert np.all(lb == -1)
    assert np.all(ub == 1)
Ejemplo n.º 5
0
    def override_obs_bounds(bound_lo: np.ndarray, bound_up: np.ndarray,
                            labels: np.ndarray) -> (np.ndarray, np.ndarray):
        """
        Default overriding method for the bounds of an observation space. This is necessary when the observations
        are scaled with their range, e.g. to compare a deviation over different kinds of observations like position and
        annular velocity. Thus, infinite bounds are not feasible.

        :param bound_lo: lower bound of the observation space
        :param bound_up: upper bound of the observation space
        :param labels: label for each dimension of the observation space to override
        :return: clipped lower and upper bound
        """
        bound_lo = ObsNormWrapper.override_bounds(bound_lo, {
            "theta_dot": -20.0,
            "alpha_dot": -20.0
        }, labels)
        bound_up = ObsNormWrapper.override_bounds(bound_up, {
            "theta_dot": 20.0,
            "alpha_dot": 20.0
        }, labels)
        return bound_lo, bound_up
Ejemplo n.º 6
0
def test_combination():
    env = QCartPoleSwingUpSim(dt=1/50., max_steps=20)

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {'x_dot': -213., 'theta_dot': -42.}
    eub = {'x_dot': 213., 'theta_dot': 42., 'x': 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r, DummyPolicy(env_r.spec), eval=True, seed=0, render_mode=RenderMode())
    ro_rn = rollout(env_rn, DummyPolicy(env_rn.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations), ro_rn.observations)

    env_rnp = ObsPartialWrapper(env_rn, idcs=['x_dot', r'cos_theta'])
    ro_rnp = rollout(env_rnp, DummyPolicy(env_rnp.spec), eval=True, seed=0, render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(env_rnp,
                                       noise_mean=0.5*np.ones(env_rnp.act_space.shape),
                                       noise_std=0.1*np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa, DummyPolicy(env_rnpa.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpa.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpa.observations)

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd, DummyPolicy(env_rnpd.spec), eval=True, seed=0, render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert isinstance(inner_env(env_rnpd), QCartPoleSwingUpSim)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)
Ejemplo n.º 7
0
def wrap_like_other_env(
        env_targ: Union[SimEnv, RealEnv],
        env_src: [SimEnv, EnvWrapper],
        use_downsampling: bool = False) -> Union[SimEnv, RealEnv]:
    """
    Wrap a given real environment like it's simulated counterpart (except the domain randomization of course).

    :param env_targ: target environment e.g. environment representing the physical device
    :param env_src: source environment e.g. simulation environment used for training
    :param use_downsampling: apply a wrapper that downsamples the actions if the sampling frequencies don't match
    :return: target environment
    """
    if use_downsampling and env_src.dt > env_targ.dt:
        if typed_env(env_targ, DownsamplingWrapper) is None:
            ds_factor = int(env_src.dt / env_targ.dt)
            env_targ = DownsamplingWrapper(env_targ, ds_factor)
            print_cbt(
                f"Wrapped the target environment with a DownsamplingWrapper of factor {ds_factor}.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with a DownsamplingWrapper.",
                "y")

    if typed_env(env_src, ActNormWrapper) is not None:
        if typed_env(env_targ, ActNormWrapper) is None:
            env_targ = ActNormWrapper(env_targ)
            print_cbt("Wrapped the target environment with an ActNormWrapper.",
                      "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ActNormWrapper.",
                "y")

    if typed_env(env_src, ObsNormWrapper) is not None:
        if typed_env(env_targ, ObsNormWrapper) is None:
            env_targ = ObsNormWrapper(env_targ)
            print_cbt("Wrapped the target environment with an ObsNormWrapper.",
                      "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsNormWrapper.",
                "y")

    if typed_env(env_src, ObsRunningNormWrapper) is not None:
        if typed_env(env_targ, ObsRunningNormWrapper) is None:
            env_targ = ObsRunningNormWrapper(env_targ)
            print_cbt(
                "Wrapped the target environment with an ObsRunningNormWrapper.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsRunningNormWrapper.",
                "y")

    if typed_env(env_src, ObsPartialWrapper) is not None:
        if typed_env(env_targ, ObsPartialWrapper) is None:
            env_targ = ObsPartialWrapper(env_targ,
                                         mask=typed_env(
                                             env_src,
                                             ObsPartialWrapper).keep_mask,
                                         keep_selected=True)
            print_cbt(
                "Wrapped the target environment with an ObsPartialWrapper.",
                "y")
        else:
            print_cbt(
                "The target environment was already wrapped with an ObsPartialWrapper.",
                "y")

    return env_targ
Ejemplo n.º 8
0
        observeTaskSpaceDiscrepancy=True,
        usePhysicsNode=True,
    )
    env = PlanarInsertSim(**env_hparams)
    # Explicit normalization bounds
    elb = {
        'DiscrepDS_Effector_X': -1.,
        'DiscrepDS_Effector_Z': -1.,
        'DiscrepDS_Effector_Bd': -1,
    }
    eub = {
        'DiscrepDS_Effector_X': 1.,
        'DiscrepDS_Effector_Z': 1.,
        'DiscrepDS_Effector_Bd': 1,
    }
    env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub)

    randomizer = get_default_randomizer(env)
    # randomizer = get_empty_randomizer()
    env = ActDelayWrapper(env)
    randomizer.add_domain_params(UniformDomainParam(name='act_delay', mean=2, halfspan=2, clip_lo=0, roundint=True))
    env = DomainRandWrapperLive(env, randomizer)

    # Policy
    policy_hparam = dict(
        obs_layer=FNN(input_size=env.obs_space.flat_dim,
                      output_size=env.act_space.flat_dim,
                      hidden_sizes=[32, 32],
                      hidden_nonlin=to.tanh,
                      dropout=0.),
        tau_init=5.,
Ejemplo n.º 9
0
def adn_variant(dt,
                max_steps,
                max_dist_force,
                physics_engine,
                normalize_obs=True,
                obsnorm_cpp=True):
    pyrado.set_seed(1001)

    # Explicit normalization bounds
    elb = {
        'EffectorLoadCell_Fx': -100.,
        'EffectorLoadCell_Fz': -100.,
        'Effector_Xd': -1,
        'Effector_Zd': -1,
        'GD_DS0d': -1,
        'GD_DS1d': -1,
        'GD_DS2d': -1,
    }
    eub = {
        'GD_DS0': 3.,
        'GD_DS1': 3,
        'GD_DS2': 3,
        'EffectorLoadCell_Fx': 100.,
        'EffectorLoadCell_Fz': 100.,
        'Effector_Xd': .5,
        'Effector_Zd': .5,
        'GD_DS0d': .5,
        'GD_DS1d': .5,
        'GD_DS2d': .5,
        'PredCollCost_h50': 1000.
    }

    extra_kwargs = {}
    if normalize_obs and obsnorm_cpp:
        extra_kwargs['normalizeObservations'] = True
        extra_kwargs['obsNormOverrideLower'] = elb
        extra_kwargs['obsNormOverrideUpper'] = eub

    # Set up environment
    env = Planar3LinkTASim(physicsEngine=physics_engine,
                           dt=dt,
                           max_steps=max_steps,
                           max_dist_force=max_dist_force,
                           collisionAvoidanceIK=True,
                           taskCombinationMethod='sum',
                           **extra_kwargs)

    if normalize_obs and not obsnorm_cpp:
        env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub)

    # Set up random policy
    policy_hparam = dict(
        tau_init=0.2,
        activation_nonlin=to.sigmoid,
        potentials_dyn_fcn=pd_cubic,
    )
    policy = ADNPolicy(spec=env.spec, dt=dt, **policy_hparam)
    print_cbt('Running ADNPolicy with random initialization', 'c', bright=True)

    # Simulate and plot potentials
    ro = rollout(env,
                 policy,
                 render_mode=RenderMode(video=True),
                 stop_on_done=True)
    plot_potentials(ro)

    return ro
Ejemplo n.º 10
0
def create_adn_setup(dt,
                     max_steps,
                     max_dist_force,
                     physics_engine,
                     normalize_obs=True,
                     obsnorm_cpp=True):
    pyrado.set_seed(0)

    # Explicit normalization bounds
    elb = {
        "EffectorLoadCell_Fx": -100.0,
        "EffectorLoadCell_Fz": -100.0,
        "Effector_Xd": -1,
        "Effector_Zd": -1,
        "GD_DS0d": -1,
        "GD_DS1d": -1,
        "GD_DS2d": -1,
    }
    eub = {
        "GD_DS0": 3.0,
        "GD_DS1": 3,
        "GD_DS2": 3,
        "EffectorLoadCell_Fx": 100.0,
        "EffectorLoadCell_Fz": 100.0,
        "Effector_Xd": 0.5,
        "Effector_Zd": 0.5,
        "GD_DS0d": 0.5,
        "GD_DS1d": 0.5,
        "GD_DS2d": 0.5,
        "PredCollCost_h50": 1000.0,
    }

    extra_kwargs = {}
    if normalize_obs and obsnorm_cpp:
        extra_kwargs["normalizeObservations"] = True
        extra_kwargs["obsNormOverrideLower"] = elb
        extra_kwargs["obsNormOverrideUpper"] = eub

    # Set up environment
    env = Planar3LinkTASim(
        physicsEngine=physics_engine,
        dt=dt,
        max_steps=max_steps,
        max_dist_force=max_dist_force,
        positionTasks=True,
        collisionAvoidanceIK=True,
        taskCombinationMethod="sum",
        observeTaskSpaceDiscrepancy=True,
        **extra_kwargs,
    )

    if normalize_obs and not obsnorm_cpp:
        env = ObsNormWrapper(env, explicit_lb=elb, explicit_ub=eub)

    # Set up random policy
    policy_hparam = dict(
        tau_init=10.0,
        activation_nonlin=to.sigmoid,
        potentials_dyn_fcn=pd_cubic,
    )
    policy = ADNPolicy(spec=env.spec, **policy_hparam)
    print_cbt("Running ADNPolicy with random initialization", "c", bright=True)

    # Simulate and plot potentials
    ro = rollout(env,
                 policy,
                 render_mode=RenderMode(video=True),
                 stop_on_done=True)
    plot_potentials(ro)

    return ro
Ejemplo n.º 11
0
def test_combination(env: SimEnv):
    pyrado.set_seed(0)
    env.max_steps = 20

    randomizer = create_default_randomizer(env)
    env_r = DomainRandWrapperBuffer(env, randomizer)
    env_r.fill_buffer(num_domains=3)

    dp_before = []
    dp_after = []
    for i in range(4):
        dp_before.append(env_r.domain_param)
        rollout(env_r,
                DummyPolicy(env_r.spec),
                eval=True,
                seed=0,
                render_mode=RenderMode())
        dp_after.append(env_r.domain_param)
        assert dp_after[i] != dp_before[i]
    assert dp_after[0] == dp_after[3]

    env_rn = ActNormWrapper(env)
    elb = {"x_dot": -213.0, "theta_dot": -42.0}
    eub = {"x_dot": 213.0, "theta_dot": 42.0, "x": 0.123}
    env_rn = ObsNormWrapper(env_rn, explicit_lb=elb, explicit_ub=eub)
    alb, aub = env_rn.act_space.bounds
    assert all(alb == -1)
    assert all(aub == 1)
    olb, oub = env_rn.obs_space.bounds
    assert all(olb == -1)
    assert all(oub == 1)

    ro_r = rollout(env_r,
                   DummyPolicy(env_r.spec),
                   eval=True,
                   seed=0,
                   render_mode=RenderMode())
    ro_rn = rollout(env_rn,
                    DummyPolicy(env_rn.spec),
                    eval=True,
                    seed=0,
                    render_mode=RenderMode())
    assert np.allclose(env_rn._process_obs(ro_r.observations),
                       ro_rn.observations)

    env_rnp = ObsPartialWrapper(
        env_rn, idcs=[env.obs_space.labels[2], env.obs_space.labels[3]])
    ro_rnp = rollout(env_rnp,
                     DummyPolicy(env_rnp.spec),
                     eval=True,
                     seed=0,
                     render_mode=RenderMode())

    env_rnpa = GaussianActNoiseWrapper(
        env_rnp,
        noise_mean=0.5 * np.ones(env_rnp.act_space.shape),
        noise_std=0.1 * np.ones(env_rnp.act_space.shape))
    ro_rnpa = rollout(env_rnpa,
                      DummyPolicy(env_rnpa.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert not np.allclose(
        ro_rnp.observations,
        ro_rnpa.observations)  # the action noise changed to rollout

    env_rnpd = ActDelayWrapper(env_rnp, delay=3)
    ro_rnpd = rollout(env_rnpd,
                      DummyPolicy(env_rnpd.spec),
                      eval=True,
                      seed=0,
                      render_mode=RenderMode())
    assert np.allclose(ro_rnp.actions, ro_rnpd.actions)
    assert not np.allclose(ro_rnp.observations, ro_rnpd.observations)

    assert type(inner_env(env_rnpd)) == type(env)
    assert typed_env(env_rnpd, ObsPartialWrapper) is not None
    assert isinstance(env_rnpd, ActDelayWrapper)
    env_rnpdr = remove_env(env_rnpd, ActDelayWrapper)
    assert not isinstance(env_rnpdr, ActDelayWrapper)