Exemple #1
0
    def test_get_action(self, obs_dim, task_num, latent_dim, action_dim):
        env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        embedding_spec = InOutSpec(
            input_space=akro.Box(low=np.zeros(task_num),
                                 high=np.ones(task_num)),
            output_space=akro.Box(low=np.zeros(latent_dim),
                                  high=np.ones(latent_dim)))
        encoder = GaussianMLPEncoder(embedding_spec)
        policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                encoder=encoder)

        env.reset()
        obs, _, _, _ = env.step(1)
        latent = np.random.random((latent_dim, ))
        task = np.zeros(task_num)
        task[0] = 1

        action1, _ = policy.get_action_given_latent(obs, latent)
        action2, _ = policy.get_action_given_task(obs, task)
        action3, _ = policy.get_action(np.concatenate([obs.flatten(), task]))

        assert env.action_space.contains(action1)
        assert env.action_space.contains(action2)
        assert env.action_space.contains(action3)

        obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3
        aug_obses = [np.concatenate([obs.flatten(), task])] * 3
        action1n, _ = policy.get_actions_given_latents(obses, latents)
        action2n, _ = policy.get_actions_given_tasks(obses, tasks)
        action3n, _ = policy.get_actions(aug_obses)

        for action in chain(action1n, action2n, action3n):
            assert env.action_space.contains(action)
Exemple #2
0
    def test_get_action(self, mock_normal, obs_dim, task_num, latent_dim,
                        action_dim):
        mock_normal.return_value = 0.5
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(
                'garage.tf.policies.'
                'gaussian_mlp_task_embedding_policy.GaussianMLPModel',
                new=SimpleGaussianMLPModel):
            embedding_spec = InOutSpec(
                input_space=akro.Box(low=np.zeros(task_num),
                                     high=np.ones(task_num)),
                output_space=akro.Box(low=np.zeros(latent_dim),
                                      high=np.ones(latent_dim)))
            encoder = GaussianMLPEncoder(embedding_spec)
            policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec,
                                                    encoder=encoder)

        env.reset()
        obs, _, _, _ = env.step(1)
        latent = np.random.random((latent_dim, ))
        task = np.zeros(task_num)
        task[0] = 1

        action1, prob1 = policy.get_action_given_latent(obs, latent)
        action2, prob2 = policy.get_action_given_task(obs, task)
        action3, prob3 = policy.get_action(
            np.concatenate([obs.flatten(), task]))

        expected_action = np.full(action_dim, 0.75)
        expected_mean = np.full(action_dim, 0.5)
        expected_log_std = np.full(action_dim, np.log(0.5))

        assert env.action_space.contains(action1)
        assert np.array_equal(action1, expected_action)
        assert np.array_equal(prob1['mean'], expected_mean)
        assert np.array_equal(prob1['log_std'], expected_log_std)

        assert env.action_space.contains(action2)
        assert np.array_equal(action2, expected_action)
        assert np.array_equal(prob2['mean'], expected_mean)
        assert np.array_equal(prob2['log_std'], expected_log_std)

        assert env.action_space.contains(action3)
        assert np.array_equal(action3, expected_action)
        assert np.array_equal(prob3['mean'], expected_mean)
        assert np.array_equal(prob3['log_std'], expected_log_std)

        obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3
        aug_obses = [np.concatenate([obs.flatten(), task])] * 3
        action1n, prob1n = policy.get_actions_given_latents(obses, latents)
        action2n, prob2n = policy.get_actions_given_tasks(obses, tasks)
        action3n, prob3n = policy.get_actions(aug_obses)

        for action, mean, log_std in chain(
                zip(action1n, prob1n['mean'], prob1n['log_std']),
                zip(action2n, prob2n['mean'], prob2n['log_std']),
                zip(action3n, prob3n['mean'], prob3n['log_std'])):
            assert env.action_space.contains(action)
            assert np.array_equal(action, expected_action)
            assert np.array_equal(mean, expected_mean)
            assert np.array_equal(log_std, expected_log_std)