Esempio n. 1
0
def test_reward(environment, action_cost, action_type):
    env_name, reward_model_ = environment
    if action_cost is not None:
        env = GymEnvironment(env_name, action_cost=action_cost)
    else:
        env = GymEnvironment(env_name)
    state = env.reset()
    if action_cost is not None:
        reward_model = reward_model_(action_cost=action_cost)
    else:
        reward_model = reward_model_()
    reward_model.set_goal(env.goal)
    for _ in range(50):
        if action_type == "random":
            action = env.action_space.sample()
        elif action_type == "zero":
            action = np.zeros(env.dim_action)
        else:
            raise NotImplementedError

        next_state, reward, done, info = env.step(action)
        if env.goal is not None:
            state = np.concatenate((state, env.goal))
        np.testing.assert_allclose(reward,
                                   reward_model(state, action, next_state)[0],
                                   rtol=1e-3,
                                   atol=1e-6)

        np.testing.assert_allclose(
            np.tile(reward, (5, )),
            reward_model(
                np.tile(state, (5, 1)),
                np.tile(action, (5, 1)),
                np.tile(next_state, (5, 1)),
            )[0],
            rtol=1e-3,
            atol=1e-6,
        )

        state = torch.tensor(state, dtype=torch.get_default_dtype())
        action = torch.tensor(action, dtype=torch.get_default_dtype())
        next_state = torch.tensor(next_state, dtype=torch.get_default_dtype())
        np.testing.assert_allclose(reward,
                                   reward_model(state, action, next_state)[0],
                                   rtol=1e-3,
                                   atol=1e-6)

        np.testing.assert_allclose(
            np.tile(reward, (5, 1)),
            reward_model(state.repeat(5, 1), action.repeat(5, 1),
                         next_state.repeat(5, 1))[0],
            rtol=1e-3,
            atol=1e-6,
        )

        state = next_state.numpy()
Esempio n. 2
0
    def test_set_state_np(self):
        env = GymEnvironment("VPendulum-v0")
        env.reset()

        state, action = self.state_action
        action = np.round(action)

        env.state = state
        obs, _, _, _ = env.step(action)
        state = env.state
        np.testing.assert_allclose(obs, state)
Esempio n. 3
0
def test_tolerance(action_cost):
    env_name, reward_model_ = ("MBRLReacher3D-v0", ReacherReward)
    if action_cost is not None:
        env = GymEnvironment(env_name, action_cost=action_cost, sparse=True)
    else:
        env = GymEnvironment(env_name, sparse=True)
    state = env.reset()
    if action_cost is not None:
        reward_model = reward_model_(action_cost=action_cost, sparse=True)
    else:
        reward_model = reward_model_(sparse=True)
    reward_model.set_goal(env.goal)

    for _ in range(50):
        action = env.action_space.sample()
        next_state, reward, done, info = env.step(action)
        if env.goal is not None:
            state = np.concatenate((state, env.goal))
        np.testing.assert_allclose(reward,
                                   reward_model(state, action, next_state)[0],
                                   rtol=1e-3,
                                   atol=1e-6)

        np.testing.assert_allclose(
            np.tile(reward, (5, )),
            reward_model(
                np.tile(state, (5, 1)),
                np.tile(action, (5, 1)),
                np.tile(next_state, (5, 1)),
            )[0],
            rtol=1e-3,
            atol=1e-6,
        )

        state = torch.tensor(state, dtype=torch.get_default_dtype())
        action = torch.tensor(action, dtype=torch.get_default_dtype())
        next_state = torch.tensor(next_state, dtype=torch.get_default_dtype())
        np.testing.assert_allclose(reward,
                                   reward_model(state, action, next_state)[0],
                                   rtol=1e-3,
                                   atol=1e-6)

        np.testing.assert_allclose(
            np.tile(reward, (5, 1)),
            reward_model(state.repeat(5, 1), action.repeat(5, 1),
                         next_state.repeat(5, 1))[0],
            rtol=1e-3,
            atol=1e-6,
        )

        state = next_state.numpy()
Esempio n. 4
0
                "probability": 0.5,
                "reward": reward
            })
        for j in range(8):
            for a in range(2):
                transitions[(3 + j, a)].append({
                    "next_state": 0,
                    "probability": 1.0,
                    "reward": 0
                })
        return transitions


if __name__ == "__main__":
    from rllib.environment import GymEnvironment
    from rllib.environment.utilities import transitions2kernelreward

    import qreps  # noqa: F401

    env = GymEnvironment("WideTree-v0", reward=1)
    kernel, reward = transitions2kernelreward(env.env.transitions,
                                              env.num_states, env.num_actions)
    print(kernel, reward)
    state = env.reset()
    print(state)
    for i in range(10):
        action = env.action_space.sample()
        next_state, r, done, f = env.step(action)
        print(state, action, next_state, r, done)
        state = next_state