예제 #1
0
    def test_get_action_np(self, hidden_sizes):
        """Test get_action function with numpy inputs."""
        env_spec = GymEnv(DummyBoxEnv())
        obs_dim = env_spec.observation_space.flat_dim
        act_dim = env_spec.action_space.flat_dim
        obs = np.ones(obs_dim, dtype=np.float32)
        init_std = 2.

        policy = GaussianMLPPolicy(env_spec=env_spec,
                                   hidden_sizes=hidden_sizes,
                                   init_std=init_std,
                                   hidden_nonlinearity=None,
                                   std_parameterization='exp',
                                   hidden_w_init=nn.init.ones_,
                                   output_w_init=nn.init.ones_)

        dist = policy(torch.from_numpy(obs))[0]

        expected_mean = torch.full(
            (act_dim, ),
            obs_dim * (torch.Tensor(hidden_sizes).prod().item()),
            dtype=torch.float)
        expected_variance = init_std**2
        action, prob = policy.get_action(obs)

        assert np.array_equal(prob['mean'], expected_mean.numpy())
        assert dist.variance.equal(
            torch.full((act_dim, ), expected_variance, dtype=torch.float))
        assert action.shape == (act_dim, )
예제 #2
0
    def test_get_action(self, hidden_sizes):
        env_spec = TfEnv(DummyBoxEnv())
        obs_dim = env_spec.observation_space.flat_dim
        act_dim = env_spec.action_space.flat_dim
        obs = torch.ones(obs_dim, dtype=torch.float32)
        init_std = 2.

        policy = GaussianMLPPolicy(env_spec=env_spec,
                                   hidden_sizes=hidden_sizes,
                                   init_std=init_std,
                                   hidden_nonlinearity=None,
                                   std_parameterization='exp',
                                   hidden_w_init=nn.init.ones_,
                                   output_w_init=nn.init.ones_)

        dist = policy(obs)

        expected_mean = torch.full(
            (act_dim, ), obs_dim * (torch.Tensor(hidden_sizes).prod().item()))
        expected_variance = init_std**2
        action, prob = policy.get_action(obs)

        assert prob['mean'].equal(expected_mean)
        assert dist.variance.equal(torch.full((act_dim, ), expected_variance))
        assert action.shape == (act_dim, )
예제 #3
0
    def test_get_action_dict_space(self):
        """Test if observations from dict obs spaces are properly flattened."""
        env = GymEnv(DummyDictEnv(obs_space_type='box', act_space_type='box'))
        policy = GaussianMLPPolicy(env_spec=env.spec,
                                   hidden_nonlinearity=None,
                                   hidden_sizes=(1, ),
                                   hidden_w_init=nn.init.ones_,
                                   output_w_init=nn.init.ones_)
        obs = env.reset()[0]

        action, _ = policy.get_action(obs)
        assert env.action_space.shape == action.shape

        actions, _ = policy.get_actions(np.array([obs, obs]))
        for action in actions:
            assert env.action_space.shape == action.shape
        actions, _ = policy.get_actions(np.array([obs, obs]))
        for action in actions:
            assert env.action_space.shape == action.shape
예제 #4
0
def test_policy_get_action(mock_model, input_dim, output_dim, hidden_sizes):
    action = torch.randn((
        1,
        output_dim,
    ))

    mock_dist = mock.MagicMock()
    mock_dist.rsample.return_value = action

    mock_model.return_value = mock_dist

    env_spec = mock.MagicMock()
    env_spec.observation_space.flat_dim = input_dim
    env_spec.action_space.flat_dim = output_dim

    policy = GaussianMLPPolicy(env_spec, mock_model)

    input = torch.ones(input_dim)
    sample = policy.get_action(input)

    assert np.array_equal(sample, np.squeeze(action.detach().numpy()))