def test_observation_wrapper_order():
    # Test to make sure observation noise wrappers are applied in correct order.
    simple_env = make_simple_env()
    simple_env.reset()
    simple_env.observe = lambda: {"cube_pos": np.array([0.1, 0.2, 0.3])}

    env = RandomizeObservationWrapper(
        env=simple_env,
        levels={"cube_pos": {
            "uncorrelated": 0.2,
            "additive": 0.1
        }})

    env.reset()

    env = ObservationDelayWrapper(
        env,
        levels={
            "interpolators": {},
            "groups": {
                "vision": {
                    "obs_names": ["cube_pos"],
                    "mean": 1.5,
                    "std": 0.0
                },
            },
        },
    )

    with pytest.raises(AssertionError):
        env.step(np.zeros(env.action_space.shape))
def test_randomize_observation_wrapper():
    simple_env = make_simple_env()
    simple_env.reset()

    env = RandomizeObservationWrapper(
        env=simple_env,
        levels={"cube_pos": {
            "uncorrelated": 0.2,
            "additive": 0.1
        }})

    with patch.object(env, "random_state") as mock_rand:
        # Remove randomness in the noise.
        mock_rand.randn.side_effect = lambda key_length: np.ones(
            key_length, dtype=np.float32)

        def mock_obs(o):
            simple_env.observe = lambda: o

        mock_obs({"cube_pos": np.array([0.1, 0.2, 0.3])})

        obs = env.reset()

        # Make sure noise is applied on noiseless value.
        assert_almost_equal(obs["noisy_cube_pos"], [0.4, 0.5, 0.6])

        mock_obs({
            "cube_pos": np.array([0.1, 0.2, 0.3]),
            "noisy_cube_pos": np.array([0.2, 0.3, 0.4]),
        })

        # Make sure noise is applied on top of noisy observation when available.
        obs = env.reset()
        assert_almost_equal(obs["noisy_cube_pos"], [0.5, 0.6, 0.7])
Exemple #3
0
def test_observe():
    # Test observation matches simulation state.
    env = make_simple_env()
    env.reset()
    simulation = env.mujoco_simulation

    obs = env.observe()

    qpos = simulation.qpos
    qpos[simulation.qpos_idxs["target_all_joints"]] = 0.0
    qvel = simulation.qvel
    qvel[simulation.qvel_idxs["target_all_joints"]] = 0.0

    true_obs = {
        "cube_pos":
        simulation.get_qpos("cube_position"),
        "cube_quat":
        rotation.quat_normalize(simulation.get_qpos("cube_rotation")),
        "hand_angle":
        simulation.get_qpos("hand_angle"),
        "fingertip_pos":
        simulation.shadow_hand.observe().fingertip_positions().flatten(),
        "qpos":
        qpos,
        "qvel":
        qvel,
    }

    for obs_key, true_val in true_obs.items():
        assert np.allclose(
            obs[obs_key], true_val
        ), f"Value for obs {obs_key} {obs[obs_key]} doesn't match true value {true_val}."
Exemple #4
0
def test_make_simple_env():
    env = make_simple_env(parameters={
        "simulation_params":
        dict(cube_appearance="vision", hide_target=True)
    })

    env.reset()
    sim = env.sim  # there is no wrapper.
    sticker_geoms = [
        g for g in sim.model.geom_names if g.startswith("cube:sticker:")
    ]
    assert len(sticker_geoms) == 9 * 6
def test_action_delay_wrapper_inactive():
    env = make_simple_env(starting_seed=0)
    env.reset()

    # Wrapper calls reset in its __init__ so no need to
    # call reset explicitly.
    delayed_env = ActionDelayWrapper(
        make_simple_env(starting_seed=0),
        delay=0.0,
        per_episode_std=0.0,
        per_step_std=0.0,
        random_state=np.random.RandomState(),
    )

    action = env.action_space.sample()
    for _ in range(20):
        ob_env, _, _, _ = env.step(action)
        ob_delayed_env, _, _, _ = delayed_env.step(action)

    for name in ob_env:
        assert (np.mean(np.abs(ob_env[name] - ob_delayed_env[name])) <
                1e-6), "ActionDelayWrapper should be inactive."
def test_randomized_broken_actuator_wrapper():
    env = make_simple_env()
    env.reset()

    env = RandomizedBrokenActuatorWrapper(env=env,
                                          proba_broken=0.5,
                                          max_broken_actuators=4,
                                          uncorrelated=0.0)
    env.reset()
    assert len(env._broken_aids) <= 4

    # The broken actuators are different after reset.
    orig_broken_aids = env._broken_aids.copy()
    env.reset()
    assert sorted(env._broken_aids) != sorted(orig_broken_aids)

    # The action is modified
    action = env.action(np.ones(env.action_space.shape)).copy()
    for i in range(env.action_space.shape[0]):
        if i in env._broken_aids:
            assert action[i] == 0.0
        else:
            assert action[i] == 1.0
def test_wrapper_divergence():
    """
    This test run the same action in the vanilla dactyl_locked env and the one that is wrapped in
    a given wrappers. After some steps, the wrapped env should diverge from the vanilla version.
    """
    env_kwargs = {
        "n_random_initial_steps": 0,
    }

    simple_env = make_simple_env(parameters=env_kwargs, starting_seed=0)
    dummy_env = make_simple_env(
        parameters=env_kwargs,
        starting_seed=0)  # should be exact same as `simple_env`

    # Add you wrappers here!
    wrappers_to_test = [
        (ActionNoiseWrapper, {}),
        (BacklashWrapper, {}),
        (FingersOccludedPhasespaceMarkers, {}),  # Need 'noisy_fingertip_pos'
        (FingersFreezingPhasespaceMarkers, {}),  # Need 'noisy_fingertip_pos'
        (
            RandomizedBrokenActuatorWrapper,
            {
                "proba_broken": 1.0,  # force one broken actuators
                "max_broken_actuators": 1,
            },
        ),
        (RandomizedRobotFrictionWrapper, {}),
        (RandomizedCubeFrictionWrapper, {}),
        (RandomizedGravityWrapper, {}),
        (RandomizedJointLimitWrapper, {}),
        (RandomizedTendonRangeWrapper, {}),
        (RandomizedPhasespaceFingersWrapper, {}),
        (RandomizedRobotDampingWrapper, {}),
        (RandomizedRobotKpWrapper, {}),
        (RandomizedTimestepWrapper, {}),
        (ActionDelayWrapper, {}),
        # With default args, the maximum qpos difference is too small.
        (RandomizedActionLatency, {
            "max_delay": 2
        }),  # default 1
        # (RandomizedBodyInertiaWrapper, {}),  # default mass_range=[0.5, 1.5]
    ]

    wrapped_envs = []
    for wrapper_class, kwargs in wrappers_to_test:
        env = make_simple_env(parameters=env_kwargs, starting_seed=0)

        if wrapper_class in (
                FingersOccludedPhasespaceMarkers,
                FingersFreezingPhasespaceMarkers,
        ):
            env = RandomizeObservationWrapper(
                env=env,
                levels={
                    "fingertip_pos": {
                        "uncorrelated": 0.002,
                        "additive": 0.001
                    }
                },
            )

        env = wrapper_class(env=env, **kwargs)
        env.reset()
        wrapped_envs.append(env)

    for i in range(200):
        action = np.ones(env.action_space.shape)
        simple_env.step(action)
        dummy_env.step(action)
        for env in wrapped_envs:
            env.step(action)

    target_qpos_idxs = joint_qpos_ids_from_prefix(
        simple_env.unwrapped.sim.model, "target:")
    kept_indices = set(range(
        simple_env.unwrapped.sim.data.qpos.shape[0])) - set(target_qpos_idxs)
    kept_indices = sorted(kept_indices)

    def get_non_target_qpos(_env):
        return np.array(_env.unwrapped.sim.data.qpos.copy()[kept_indices])

    # Make sure the base env is deterministic
    assert np.array_equal(get_non_target_qpos(simple_env),
                          get_non_target_qpos(dummy_env))

    for env in wrapped_envs:
        diffs = np.absolute(
            get_non_target_qpos(simple_env) - get_non_target_qpos(env))
        assert np.max(diffs) > 1e-4, "failed for {}".format(
            env.__class__.__name__)
        assert np.min(diffs) > 0.0, "failed for {}".format(
            env.__class__.__name__)
def test_observation_delay_wrapper():
    levels = {
        "interpolators": {
            "cube_quat": "QuatInterpolator",
            "cube_face_angle": "RadianInterpolator",
        },
        "groups": {
            "vision": {
                "obs_names": ["cube_pos", "cube_quat"],
                "mean": 1.5,
                "std": 0.0,
            },
            "giiker": {
                "obs_names": ["cube_face_angle"],
                "mean": 1.4,
                "std": 0.0
            },
            "phasespace": {
                "obs_names": ["fingertip_pos"],
                "mean": 1.2,
                "std": 0.0
            },
        },
    }

    simple_env = make_simple_env()
    simple_env.reset()

    env = ObservationDelayWrapper(simple_env, levels)

    def mock_obs(o):
        simple_env.observe = lambda: o

    initial_obs = {
        "cube_pos":
        np.array([0.1, 0.2, 0.3]),
        "cube_quat":
        rotation.euler2quat(np.array([0.0, 0.0, 0.0])),
        "cube_face_angle":
        np.array([np.pi - 0.01, np.pi / 2 - 0.01, 0.0, 0.0, 0.0, 0.0]),
        "fingertip_pos":
        np.array([0.5, 0.6, 0.7]),
    }

    mock_obs(initial_obs)

    env.reset()

    second_obs = {
        "cube_pos":
        np.array([0.2, 0.3, 0.4]),
        "cube_quat":
        rotation.euler2quat(np.array([0.8, 0.0, 0.0])),
        "cube_face_angle":
        np.array([-np.pi + 0.01, np.pi / 2 + 0.01, 0.0, 0.0, 0.0, 0.0]),
        "fingertip_pos":
        np.array([0.5, 0.6, 0.7]),
    }

    mock_obs(second_obs)

    obs = env.step(np.zeros(env.action_space.shape))[0]

    # Should take the first observation because there are only two observations and nothing
    # to interpolate.
    for key in initial_obs:
        assert_almost_equal(obs[f"noisy_{key}"], initial_obs[key])

    # Step env again so obs should be interpolation of initial and second obs.
    obs = env.step(np.zeros(env.action_space.shape))[0]

    assert_almost_equal(obs["noisy_cube_pos"], [0.15, 0.25, 0.35])
    assert_almost_equal(rotation.quat2euler(obs["noisy_cube_quat"]),
                        [0.4, 0.0, 0.0])
    assert_almost_equal(
        obs["noisy_cube_face_angle"],
        [-np.pi + 0.002, np.pi / 2 + 0.002, 0.0, 0.0, 0.0, 0.0],
    )
    assert_almost_equal(obs["noisy_fingertip_pos"], [0.5, 0.6, 0.7])