def __init__(self, path: str, device: str):
        path = Path(path).absolute()
        with open(path.parent / "hyperparams.json", "r") as f:
            hyperparams = json.load(f)

        checkpoint = torch.load(path, "cpu")
        self.ns = hyperparams["diayn"]["num_skills"]
        env = make_env("Ant-v4", device)

        diayn = DiscreteDiayn(
            env.observation_space,
            hyperparams["diayn"]["model"]["hidden_dim"],
            self.ns,
            hyperparams["diayn"]["reward_weight"],
            _truncate=hyperparams["diayn"]["truncate"],
        )
        env = DiaynWrapper(env, diayn)
        self.forward_policy = TanhGaussianActor(
            obs_spec=env.observation_space,
            act_spec=env.action_space,
            hidden_dim=hyperparams["sac"]["actor"]["policy"]["hidden_dim"],
        )
        self.forward_policy.load_state_dict(checkpoint["sac"]["actor"]["state_dict"])
        self.forward_policy.to(device)
        self.device = device
def test_video_wrapper():
    env = VideoWrapper(make_env("HalfCheetah-v2", "cpu"))
    env.reset()
    for _ in range(49):
        action = torch.tensor(env.action_space.sample()).unsqueeze(0)
        env.step(action)
    assert env.get_video_and_clear().dim() == 4
def test_critic():
    for device in "cpu", "cuda":
        env = make_env("CartPole-v1", device)
        critic = DqnCritic(env.observation_space, env.action_space, [256, 256],
                           device)
        for _ in range(300):
            critic(
                torchify(env.observation_space.sample(), device),
                torchify(env.action_space.sample(), device),
            )
Esempio n. 4
0
def diayn_test(diayn):
    for dev in "cpu", "cuda":
        env = make_env("InvertedPendulum-v2", dev)
        dd = diayn(env.observation_space, [256, 256], 10, 3, 128, _device=dev)
        obs = env.reset()
        obs["diayn"] = dd.sample(1)
        print(obs)
        dd.calc_rewards(obs)
        dd.train(obs)

        assert isinstance(dd, Loggable)
        print(dd.log_hyperparams())
        print(dd.log_epoch())
def test_actor():
    for device in "cpu", "cuda":
        env = make_env("CartPole-v1", device)
        critic = DqnCritic(env.observation_space, env.action_space, [256, 256],
                           device)
        actor = DqnActor(critic, env.observation_space, env.action_space,
                         device)
        for _ in range(300):
            obs = collate([
                torchify(env.observation_space.sample(), device)
                for _ in range(20)
            ])
            act = actor(obs)
            rand_act = torch.cat([
                torchify(env.action_space.sample(), device) for _ in range(20)
            ])
            assert torch.all(critic(obs, act) >= critic(obs, rand_act))
def abstract_test_diayn(diayn_class, device):
    env = make_env("InvertedPendulum-v2", device)
    diayn = diayn_class(env.observation_space, [256, 256],
                        1000,
                        2.0,
                        _device=device)
    env = DiaynWrapper(env, diayn)

    # test device
    obs = env.reset()
    assert obs["diayn"].device == obs["observations"].device

    # test reset
    obs = env.reset()
    obs2 = env.reset()
    assert not torch.all(obs["diayn"] == obs2["diayn"])

    # test same rollout
    obs = env.reset()
    for _ in range(1000):
        action = torch.tensor(env.action_space.sample()).unsqueeze(0)
        obs2, _, _, _ = env.step(action)
        assert torch.all(obs["diayn"] == obs2["diayn"])