def test_get_action_np(self, hidden_sizes): """Test get_action function with numpy inputs.""" env_spec = GymEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim act_dim = env_spec.action_space.flat_dim obs = np.ones(obs_dim, dtype=np.float32) init_std = 2. policy = GaussianMLPPolicy(env_spec=env_spec, hidden_sizes=hidden_sizes, init_std=init_std, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) dist = policy(torch.from_numpy(obs))[0] expected_mean = torch.full( (act_dim, ), obs_dim * (torch.Tensor(hidden_sizes).prod().item()), dtype=torch.float) expected_variance = init_std**2 action, prob = policy.get_action(obs) assert np.array_equal(prob['mean'], expected_mean.numpy()) assert dist.variance.equal( torch.full((act_dim, ), expected_variance, dtype=torch.float)) assert action.shape == (act_dim, )
def test_get_action(self, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs_dim = env_spec.observation_space.flat_dim act_dim = env_spec.action_space.flat_dim obs = torch.ones(obs_dim, dtype=torch.float32) init_std = 2. policy = GaussianMLPPolicy(env_spec=env_spec, hidden_sizes=hidden_sizes, init_std=init_std, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) dist = policy(obs) expected_mean = torch.full( (act_dim, ), obs_dim * (torch.Tensor(hidden_sizes).prod().item())) expected_variance = init_std**2 action, prob = policy.get_action(obs) assert prob['mean'].equal(expected_mean) assert dist.variance.equal(torch.full((act_dim, ), expected_variance)) assert action.shape == (act_dim, )
def test_get_action_dict_space(self): """Test if observations from dict obs spaces are properly flattened.""" env = GymEnv(DummyDictEnv(obs_space_type='box', act_space_type='box')) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_nonlinearity=None, hidden_sizes=(1, ), hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) obs = env.reset()[0] action, _ = policy.get_action(obs) assert env.action_space.shape == action.shape actions, _ = policy.get_actions(np.array([obs, obs])) for action in actions: assert env.action_space.shape == action.shape actions, _ = policy.get_actions(np.array([obs, obs])) for action in actions: assert env.action_space.shape == action.shape
def test_policy_get_action(mock_model, input_dim, output_dim, hidden_sizes): action = torch.randn(( 1, output_dim, )) mock_dist = mock.MagicMock() mock_dist.rsample.return_value = action mock_model.return_value = mock_dist env_spec = mock.MagicMock() env_spec.observation_space.flat_dim = input_dim env_spec.action_space.flat_dim = output_dim policy = GaussianMLPPolicy(env_spec, mock_model) input = torch.ones(input_dim) sample = policy.get_action(input) assert np.array_equal(sample, np.squeeze(action.detach().numpy()))