def __init__(self, env_spec, n_agents, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, layer_normalization=False, name='CentralizedCategoricalMLPPolicy'): assert isinstance(env_spec.action_space, akro.Discrete), ( 'Categorical policy only works with akro.Discrete action space.') self.centralized = True self.vectorized = True self._n_agents = n_agents self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.n self.name = name MLPModule.__init__(self, input_dim=self._obs_dim, output_dim=self._action_dim * self._n_agents, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, layer_normalization=layer_normalization)
def __init__(self, env_spec, name="CategoricalMLPPolicy", **kwargs): self._obs_dim = env_spec.input_space.flat_dim self._action_dim = env_spec.output_space.flat_dim Policy.__init__(self, env_spec, name) MLPModule.__init__(self, input_dim=self._obs_dim, output_dim=self._action_dim, **kwargs)
def test_dueling_output_values(output_dim, kernel_sizes, hidden_channels, strides, paddings): batch_size = 64 input_width = 32 input_height = 32 in_channel = 3 input_shape = (batch_size, in_channel, input_height, input_width) obs = torch.rand(input_shape) module = DiscreteDuelingCNNModule(input_shape=input_shape, output_dim=output_dim, hidden_channels=hidden_channels, hidden_sizes=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, is_image=False) cnn = CNNModule(input_var=obs, hidden_channels=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_, is_image=False) flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1] mlp_adv = MLPModule( flat_dim, output_dim, hidden_channels, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, ) mlp_val = MLPModule( flat_dim, 1, hidden_channels, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, ) cnn_out = cnn(obs) val = mlp_val(torch.flatten(cnn_out, start_dim=1)) adv = mlp_adv(torch.flatten(cnn_out, start_dim=1)) output = val + (adv - adv.mean(1).unsqueeze(1)) assert torch.all(torch.eq(output.detach(), module(obs).detach()))
def test_is_pickleable(self, input_dim, output_dim, hidden_sizes): """Check MLPModule is pickeable. Args: input_dim (int): Input dimension. output_dim (int): Ouput dimension. hidden_sizes (list[int]): Size of hidden layers. """ input_val = torch.ones([1, input_dim], dtype=torch.float32) module = MLPModule(input_dim=input_dim, output_dim=output_dim, hidden_nonlinearity=torch.relu, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, output_nonlinearity=torch.nn.ReLU) output1 = module(input_val) h = pickle.dumps(module) model_pickled = pickle.loads(h) output2 = model_pickled(input_val) assert np.array_equal(torch.all(torch.eq(output1, output2)), True)
def __init__(self, env_spec, **kwargs): """ Initialize class with multiple attributes. Args: env_spec (garage.envs.env_spec.EnvSpec): Environment specification. nn_module (nn.Module): Neural network module in PyTorch. """ self._env_spec = env_spec self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.flat_dim MLPModule.__init__(self, input_dim=self._obs_dim + self._action_dim, output_dim=1, **kwargs)
def __init__(self, input_shape, output_dim, kernel_sizes, hidden_channels, strides, hidden_sizes=(32, 32), cnn_hidden_nonlinearity=nn.ReLU, mlp_hidden_nonlinearity=nn.ReLU, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, paddings=0, padding_mode='zeros', max_pool=False, pool_shape=None, pool_stride=1, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, layer_normalization=False, is_image=True): super().__init__() input_var = torch.zeros(input_shape) cnn_module = CNNModule(input_var=input_var, kernel_sizes=kernel_sizes, strides=strides, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, hidden_channels=hidden_channels, hidden_nonlinearity=cnn_hidden_nonlinearity, paddings=paddings, padding_mode=padding_mode, max_pool=max_pool, layer_normalization=layer_normalization, pool_shape=pool_shape, pool_stride=pool_stride, is_image=is_image) with torch.no_grad(): cnn_out = cnn_module(input_var) flat_dim = torch.flatten(cnn_out, start_dim=1).shape[1] mlp_module = MLPModule(flat_dim, output_dim, hidden_sizes, hidden_nonlinearity=mlp_hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, layer_normalization=layer_normalization) if mlp_hidden_nonlinearity is None: self._module = nn.Sequential(cnn_module, nn.Flatten(), mlp_module) else: self._module = nn.Sequential(cnn_module, mlp_hidden_nonlinearity(), nn.Flatten(), mlp_module)
def __init__(self, env_spec, **kwargs): """Initialize class with multiple attributes. Args: env_spec (EnvSpec): Environment specification. **kwargs: Keyword arguments. """ self._env_spec = env_spec self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.flat_dim MLPModule.__init__(self, input_dim=self._obs_dim + self._action_dim, output_dim=1, **kwargs)
def __init__(self, spec, image_format, *, kernel_sizes, hidden_channels, strides, hidden_sizes=(32, 32), cnn_hidden_nonlinearity=nn.ReLU, mlp_hidden_nonlinearity=nn.ReLU, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, paddings=0, padding_mode='zeros', max_pool=False, pool_shape=None, pool_stride=1, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, layer_normalization=False): super().__init__() cnn_spec = InOutSpec(input_space=spec.input_space, output_space=None) cnn_module = CNNModule(spec=cnn_spec, image_format=image_format, kernel_sizes=kernel_sizes, strides=strides, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, hidden_channels=hidden_channels, hidden_nonlinearity=cnn_hidden_nonlinearity, paddings=paddings, padding_mode=padding_mode, max_pool=max_pool, layer_normalization=layer_normalization, pool_shape=pool_shape, pool_stride=pool_stride) flat_dim = cnn_module.spec.output_space.flat_dim output_dim = spec.output_space.flat_dim mlp_module = MLPModule(flat_dim, output_dim, hidden_sizes, hidden_nonlinearity=mlp_hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, layer_normalization=layer_normalization) if mlp_hidden_nonlinearity is None: self._module = nn.Sequential(cnn_module, nn.Flatten(), mlp_module) else: self._module = nn.Sequential(cnn_module, mlp_hidden_nonlinearity(), nn.Flatten(), mlp_module)
def __init__(self, env_spec, name='DeterministicMLPPolicy', **kwargs): """Initialize class with multiple attributes. Args: env_spec (garage.envs.env_spec.EnvSpec): Environment specification. name (str): Policy name. kwargs : Additional keyword arguments passed to the MLPModule. """ self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.flat_dim Policy.__init__(self, env_spec, name) MLPModule.__init__(self, input_dim=self._obs_dim, output_dim=self._action_dim, **kwargs)
def test_mlp_with_learnable_non_linear_function(self): """Test MLPModule with learnable non-linear functions.""" input_dim, output_dim, hidden_sizes = 1, 1, (3, 2) input_val = -torch.ones([1, input_dim], dtype=torch.float32) module = MLPModule(input_dim=input_dim, output_dim=output_dim, hidden_nonlinearity=torch.nn.PReLU(init=10.), hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, output_nonlinearity=torch.nn.PReLU(init=1.)) output = module(input_val) output.sum().backward() for tt in module.parameters(): assert torch.all(torch.ne(tt.grad, 0))
def test_output_values(self, input_dim, output_dim, hidden_sizes): """Test output values from MLPModule. Args: input_dim (int): Input dimension. output_dim (int): Ouput dimension. hidden_sizes (list[int]): Size of hidden layers. """ input_val = torch.ones([1, input_dim], dtype=torch.float32) module_with_nonlinear_function_and_module = MLPModule( input_dim=input_dim, output_dim=output_dim, hidden_nonlinearity=torch.relu, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, output_nonlinearity=torch.nn.ReLU) module_with_nonlinear_module_instance_and_function = MLPModule( input_dim=input_dim, output_dim=output_dim, hidden_nonlinearity=torch.nn.ReLU(), hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, output_nonlinearity=torch.relu) output1 = module_with_nonlinear_function_and_module(input_val) output2 = module_with_nonlinear_module_instance_and_function(input_val) expected_output = torch.full([1, output_dim], fill_value=5 * np.prod(hidden_sizes), dtype=torch.float32) assert torch.all(torch.eq(expected_output, output1)) assert torch.all(torch.eq(expected_output, output2))
def test_is_pickleable(self, input_dim, output_dim, hidden_sizes): input_val = torch.ones([1, 5], dtype=torch.float32) module = MLPModule(input_dim=input_dim, output_dim=output_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output1 = module(input_val) h = pickle.dumps(module) model_pickled = pickle.loads(h) output2 = model_pickled(input_val) assert np.array_equal(torch.all(torch.eq(output1, output2)), True)
def test_output_values(self, input_dim, output_dim, hidden_sizes): input_val = torch.ones([1, 5], dtype=torch.float32) module = MLPModule(input_dim=input_dim, output_dim=output_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) output = module(input_val) expected_output = torch.full([1, output_dim], fill_value=5 * np.prod(hidden_sizes), dtype=torch.float32) self.assertEqual(torch.all(torch.eq(output, expected_output)), True)
def test_output_shape(self, obs_dim, act_dim, output_dim, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0) nn_module = MLPModule(input_dim=obs_dim + act_dim, output_dim=output_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) qf = ContinuousNNQFunction(env_spec, nn_module) output = qf.get_qval(obs, act) assert output.shape == (1, 1)
def test_get_actions(self, obs_dim, act_dim, batch_size, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) nn_module = MLPModule( input_dim=obs_dim, output_dim=act_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) policy = DeterministicPolicy(env_spec, nn_module) expected_output = np.full([batch_size, act_dim], fill_value=obs_dim * np.prod(hidden_sizes), dtype=np.float32) assert np.array_equal(policy.get_actions(obs), expected_output)
def test_output_values(output_dim, kernel_sizes, hidden_channels, strides, paddings): input_width = 32 input_height = 32 in_channel = 3 input_shape = (in_channel, input_height, input_width) spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf), akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf)) obs = torch.rand(input_shape) module = DiscreteCNNModule(spec=spec, image_format='NCHW', hidden_channels=hidden_channels, hidden_sizes=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) cnn = CNNModule(spec=InOutSpec( akro.Box(shape=input_shape, low=-np.inf, high=np.inf), None), image_format='NCHW', hidden_channels=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_) flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1] mlp = MLPModule( flat_dim, output_dim, hidden_channels, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, ) cnn_out = cnn(obs) output = mlp(torch.flatten(cnn_out, start_dim=1)) assert torch.all(torch.eq(output.detach(), module(obs).detach()))
def __init__(self, input_dim, output_dim, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, learn_std=True, init_std=1.0, min_std=1e-6, max_std=None, std_parameterization='exp', layer_normalization=False, normal_distribution_cls=Normal): super().__init__(input_dim=input_dim, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, learn_std=learn_std, init_std=init_std, min_std=min_std, max_std=max_std, std_parameterization=std_parameterization, layer_normalization=layer_normalization, normal_distribution_cls=normal_distribution_cls) self._mean_module = MLPModule( input_dim=self._input_dim, output_dim=self._action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, layer_normalization=self._layer_normalization)
def test_no_head_invalid_settings(self, hidden_nonlinear, output_nonlinear): """Check MLPModule throws exception with invalid non-linear functions. Args: hidden_nonlinear (callable or torch.nn.Module): Non-linear functions for hidden layers. output_nonlinear (callable or torch.nn.Module): Non-linear functions for output layer. """ expected_msg = 'Non linear function .* is not supported' with pytest.raises(ValueError, match=expected_msg): MLPModule(input_dim=3, output_dim=5, hidden_sizes=(2, 3), hidden_nonlinearity=hidden_nonlinear, output_nonlinearity=output_nonlinear)
def test_is_pickleable(self, obs_dim, act_dim, batch_size, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones([batch_size, obs_dim], dtype=torch.float32) nn_module = MLPModule( input_dim=obs_dim, output_dim=act_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) policy = DeterministicPolicy(env_spec, nn_module) output1 = policy.get_actions(obs) p = pickle.dumps(policy) policy_pickled = pickle.loads(p) output2 = policy_pickled.get_actions(obs) assert np.array_equal(output1, output2)
def test_get_qval(self, obs_dim, act_dim, output_dim, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0) nn_module = MLPModule(input_dim=obs_dim + act_dim, output_dim=output_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) qf = ContinuousNNQFunction(env_spec, nn_module) output = qf.get_qval(obs, act) expected_output = torch.full([1, output_dim], fill_value=(obs_dim + act_dim) * np.prod(hidden_sizes), dtype=torch.float32) assert torch.eq(output, expected_output)
def test_is_pickleable(self, obs_dim, act_dim, output_dim, hidden_sizes): env_spec = TfEnv(DummyBoxEnv()) obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0) act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0) nn_module = MLPModule(input_dim=obs_dim + act_dim, output_dim=output_dim, hidden_nonlinearity=None, hidden_sizes=hidden_sizes, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) qf = ContinuousNNQFunction(env_spec, nn_module) output1 = qf.get_qval(obs, act) p = pickle.dumps(qf) qf_pickled = pickle.loads(p) output2 = qf_pickled.get_qval(obs, act) assert torch.eq(output1, output2)
def __init__( self, K, Dx, mlp_input_dim=None, hidden_layer_sizes=(124, 124), reg=0.001, reparameterize=True, ): self._reg = reg self._reparameterize = reparameterize self._Dx = Dx self._K = K if mlp_input_dim == None: self._w_and_mu_logsig_t = torch.distributions.normal.Normal(0, 0.1) self._use_mlp = False else: self._w_and_mu_logsig_t = MLPModule( input_dim=mlp_input_dim, output_dim=K * (2 * Dx + 1), hidden_sizes=hidden_layer_sizes) self._use_mlp = True
def __init__(self, input_dim, output_dim, hidden_sizes=(64, 64), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, layer_normalization=False): super().__init__() self._input_dim = input_dim self._hidden_sizes = hidden_sizes self._action_dim = output_dim self._hidden_nonlinearity = hidden_nonlinearity self._hidden_w_init = hidden_w_init self._hidden_b_init = hidden_b_init self._output_w_init = output_w_init self._output_b_init = output_b_init self._layer_normalization = layer_normalization # Set output nonlinearity to none as we need raw preds for St gumbel-softmax estimator self._output_nonlinearity = None self.categorical_logits_module = MLPModule( input_dim= self._input_dim, output_dim = self._action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=None, output_w_init=self._output_w_init, output_b_init=self._output_b_init, layer_normalization=self._layer_normalization )