예제 #1
0
def test_multi_headed_mlp_module(input_dim, output_dim, hidden_sizes,
                                 output_w_init_vals, n_heads):
    """Test Multi-headed MLPModule.

    Args:
        input_dim (int): Input dimension.
        output_dim (int): Ouput dimension.
        hidden_sizes (list[int]): Size of hidden layers.
        output_w_init_vals (list[int]): Init values for output weights.
        n_heads (int): Number of output layers.

    """
    module = MultiHeadedMLPModule(n_heads=n_heads,
                                  input_dim=input_dim,
                                  output_dims=output_dim,
                                  hidden_sizes=hidden_sizes,
                                  hidden_nonlinearity=None,
                                  hidden_w_init=nn.init.ones_,
                                  output_nonlinearities=None,
                                  output_w_inits=list(
                                      map(_helper_make_inits,
                                          output_w_init_vals)))

    input_value = torch.ones(input_dim)
    outputs = module(input_value)

    if len(output_w_init_vals) == 1:
        output_w_init_vals = list(output_w_init_vals) * n_heads
    if len(output_dim) == 1:
        output_dim = list(output_dim) * n_heads
    for i, output in enumerate(outputs):
        expected = input_dim * torch.Tensor(hidden_sizes).prod()
        expected *= output_w_init_vals[i]
        assert torch.equal(
            output, torch.full((output_dim[i], ), expected, dtype=torch.float))
예제 #2
0
def test_invalid_settings(input_dim, output_dim, hidden_sizes, n_heads,
                          nonlinearity, w_init, b_init):
    """Test Multi-headed MLPModule with invalid parameters.

    Args:
        input_dim (int): Input dimension.
        output_dim (int): Ouput dimension.
        hidden_sizes (list[int]): Size of hidden layers.
        n_heads (int): Number of output layers.
        nonlinearity (callable or torch.nn.Module): Non-linear functions for
            output layers
        w_init (list[callable]): Initializer function for the weight in
            output layer.
        b_init (list[callable]): Initializer function for the bias in
            output layer.

    """
    expected_msg_template = ('should be either an integer or a collection of '
                             'length n_heads')
    with pytest.raises(ValueError, match=expected_msg_template):
        MultiHeadedMLPModule(n_heads=n_heads,
                             input_dim=input_dim,
                             output_dims=output_dim,
                             hidden_sizes=hidden_sizes,
                             hidden_nonlinearity=None,
                             hidden_w_init=nn.init.ones_,
                             output_nonlinearities=nonlinearity,
                             output_w_inits=list(
                                 map(_helper_make_inits, w_init)),
                             output_b_inits=b_init)
예제 #3
0
    def __init__(self,
                 env_spec,
                 image_format,
                 kernel_sizes,
                 *,
                 hidden_channels,
                 strides=1,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 paddings=0,
                 padding_mode='zeros',
                 max_pool=False,
                 pool_shape=None,
                 pool_stride=1,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False,
                 name='CategoricalCNNPolicy'):

        if not isinstance(env_spec.action_space, akro.Discrete):
            raise ValueError('CategoricalMLPPolicy only works '
                             'with akro.Discrete action space.')
        if isinstance(env_spec.observation_space, akro.Dict):
            raise ValueError('CNN policies do not support '
                             'with akro.Dict observation spaces.')

        super().__init__(env_spec, name)

        self._cnn_module = CNNModule(InOutSpec(
            self._env_spec.observation_space, None),
                                     image_format=image_format,
                                     kernel_sizes=kernel_sizes,
                                     strides=strides,
                                     hidden_channels=hidden_channels,
                                     hidden_w_init=hidden_w_init,
                                     hidden_b_init=hidden_b_init,
                                     hidden_nonlinearity=hidden_nonlinearity,
                                     paddings=paddings,
                                     padding_mode=padding_mode,
                                     max_pool=max_pool,
                                     pool_shape=pool_shape,
                                     pool_stride=pool_stride,
                                     layer_normalization=layer_normalization)
        self._mlp_module = MultiHeadedMLPModule(
            n_heads=1,
            input_dim=self._cnn_module.spec.output_space.flat_dim,
            output_dims=[self._env_spec.action_space.flat_dim],
            hidden_sizes=hidden_sizes,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            hidden_nonlinearity=hidden_nonlinearity,
            output_w_inits=output_w_init,
            output_b_inits=output_b_init)
예제 #4
0
    def __init__(
        self,
        input_dim,
        output_dim,
        hidden_sizes,
        hidden_nonlinearity,
        output_nonlinearity,
        min_std,
        max_std,
        normal_distribution_cls,
        init_std,
        std_parameterization,
        hidden_w_init,
        hidden_b_init,
        output_w_init,
        output_b_init,
        layer_normalization,
        learn_std
    ):

        super().__init__(
            input_dim=input_dim,
            output_dim=output_dim,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearity=output_nonlinearity,
            min_std=min_std,
            max_std=max_std,
            normal_distribution_cls=normal_distribution_cls,
            init_std=init_std,
            std_parameterization=std_parameterization,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            layer_normalization=layer_normalization,
            learn_std=learn_std,
        )

        self.mean_log_std = MultiHeadedMLPModule(
            n_heads=2,
            input_dim=input_dim,
            output_dims=output_dim,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            output_nonlinearities=output_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            output_w_inits=output_w_init,
            output_b_inits=[
                nn.init.zeros_,
                lambda x: nn.init.constant_(x, self._init_std.item())
            ],
            layer_normalization=layer_normalization
        )
def test_invalid_settings(input_dim, output_dim, hidden_sizes, n_heads,
                          nonlinearity, w_init, b_init):
    with pytest.raises(ValueError):
        MultiHeadedMLPModule(n_heads=n_heads,
                             input_dim=input_dim,
                             output_dims=output_dim,
                             hidden_sizes=hidden_sizes,
                             hidden_nonlinearity=None,
                             hidden_w_init=nn.init.ones_,
                             output_nonlinearities=nonlinearity,
                             output_w_inits=list(map(helper_make_inits,
                                                     w_init)),
                             output_b_inits=b_init)
예제 #6
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 learn_std=True,
                 init_std=1.0,
                 min_std=1e-6,
                 max_std=None,
                 std_parameterization='exp',
                 layer_normalization=False,
                 normal_distribution_cls=Normal):
        super().__init__(input_dim=input_dim,
                         output_dim=output_dim,
                         hidden_sizes=hidden_sizes,
                         hidden_nonlinearity=hidden_nonlinearity,
                         hidden_w_init=hidden_w_init,
                         hidden_b_init=hidden_b_init,
                         output_nonlinearity=output_nonlinearity,
                         output_w_init=output_w_init,
                         output_b_init=output_b_init,
                         learn_std=learn_std,
                         init_std=init_std,
                         min_std=min_std,
                         max_std=max_std,
                         std_parameterization=std_parameterization,
                         layer_normalization=layer_normalization,
                         normal_distribution_cls=normal_distribution_cls)

        self._shared_mean_log_std_network = MultiHeadedMLPModule(
            n_heads=2,
            input_dim=self._input_dim,
            output_dims=self._action_dim,
            hidden_sizes=self._hidden_sizes,
            hidden_nonlinearity=self._hidden_nonlinearity,
            hidden_w_init=self._hidden_w_init,
            hidden_b_init=self._hidden_b_init,
            output_nonlinearities=self._output_nonlinearity,
            output_w_inits=self._output_w_init,
            output_b_inits=[
                nn.init.zeros_,
                lambda x: nn.init.constant_(x, self._init_std.item())
            ],
            layer_normalization=self._layer_normalization)
def test_multi_headed_mlp_module(input_dim, output_dim, hidden_sizes,
                                 output_w_init_vals):
    module = MultiHeadedMLPModule(n_heads=len(output_w_init_vals),
                                  input_dim=input_dim,
                                  output_dims=output_dim,
                                  hidden_sizes=hidden_sizes,
                                  hidden_nonlinearity=None,
                                  hidden_w_init=nn.init.ones_,
                                  output_nonlinearities=None,
                                  output_w_inits=list(
                                      map(helper_make_inits,
                                          output_w_init_vals)))

    input = torch.ones(input_dim)
    outputs = module(input)

    for i in range(len(outputs)):
        expected = input_dim * torch.Tensor(hidden_sizes).prod()
        expected *= output_w_init_vals[i]
        assert torch.equal(outputs[i], torch.full((output_dim, ), expected))