Python variance_scaling_init Examples, alf.initializers.variance_scaling_init Python Examples

Example #1

0

Show file

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 activation=torch.relu_,
                 strides=1,
                 padding=0,
                 use_bias=None,
                 use_bn=False,
                 kernel_initializer=None,
                 kernel_init_gain=1.0,
                 bias_init_value=0.0):
        """A 2D ConvTranspose layer that's also responsible for activation and
        customized weights initialization. An auto gain calculation might depend
        on the activation following the conv layer. Suggest using this wrapper
        module instead of ``nn.ConvTranspose2d`` if you really care about weight std
        after init.

        Args:
            in_channels (int): channels of the input image
            out_channels (int): channels of the output image
            kernel_size (int or tuple):
            activation (torch.nn.functional):
            strides (int or tuple):
            padding (int or tuple):
            use_bias (bool|None): If None, will use ``not use_bn``
            use_bn (bool): whether use batch normalization
            kernel_initializer (Callable): initializer for the conv_trans layer.
                If None is provided a variance_scaling_initializer with gain as
                ``kernel_init_gain`` will be used.
            kernel_init_gain (float): a scaling factor (gain) applied to the
                std of kernel init distribution. It will be ignored if
                ``kernel_initializer`` is not None.
            bias_init_value (float): a constant
        """
        super(ConvTranspose2D, self).__init__()
        if use_bias is None:
            use_bias = not use_bn
        self._activation = activation
        self._conv_trans2d = nn.ConvTranspose2d(in_channels,
                                                out_channels,
                                                kernel_size,
                                                stride=strides,
                                                padding=padding,
                                                bias=use_bias)
        if kernel_initializer is None:
            variance_scaling_init(self._conv_trans2d.weight.data,
                                  gain=kernel_init_gain,
                                  nonlinearity=self._activation,
                                  transposed=True)
        else:
            kernel_initializer(self._conv_trans2d.weight.data)

        if use_bias:
            nn.init.constant_(self._conv_trans2d.bias.data, bias_init_value)

        if use_bn:
            self._bn = nn.BatchNorm2d(out_channels)
        else:
            self._bn = None

Example #2

0

Show file

    def __init__(self,
                 input_size,
                 output_size,
                 n,
                 activation=identity,
                 use_bias=True,
                 use_bn=False,
                 use_ln=False,
                 kernel_initializer=None,
                 kernel_init_gain=1.0,
                 bias_init_value=0.0):
        """Parallel FC layer.

        It is equivalent to ``n`` separate FC layers with the same
        ``input_size`` and ``output_size``.

        Args:
            input_size (int): input size
            output_size (int): output size
            n (int): n independent ``FC`` layers
            activation (torch.nn.functional):
            use_bn (bool): whether use Batch Normalization.
            use_ln (bool): whether use layer normalization
            use_bias (bool): whether use bias
            kernel_initializer (Callable): initializer for the FC layer kernel.
                If none is provided a ``variance_scaling_initializer`` with gain
                as ``kernel_init_gain`` will be used.
            kernel_init_gain (float): a scaling factor (gain) applied to
                the std of kernel init distribution. It will be ignored if
                ``kernel_initializer`` is not None.
            bias_init_value (float): a constant
        """
        super().__init__()
        self._activation = activation
        self._weight = nn.Parameter(torch.Tensor(n, output_size, input_size))
        if use_bias:
            self._bias = nn.Parameter(torch.Tensor(n, output_size))
        else:
            self._bias = None

        for i in range(n):
            if kernel_initializer is None:
                variance_scaling_init(self._weight.data[i],
                                      gain=kernel_init_gain,
                                      nonlinearity=self._activation)
            else:
                kernel_initializer(self._weight.data[i])

        if use_bias:
            nn.init.constant_(self._bias.data, bias_init_value)
        if use_bn:
            self._bn = nn.BatchNorm1d(n * output_size)
        else:
            self._bn = None

        if use_ln:
            self._ln = nn.GroupNorm(n, n * output_size)
        else:
            self._ln = None

Example #3

0

Show file

File: layers.py Project: zhuboli/alf

    def reset_parameters(self):
        if self._kernel_initializer is None:
            variance_scaling_init(self._weight.data,
                                  gain=self._kernel_init_gain,
                                  nonlinearity=self._activation)
        else:
            self._kernel_initializer(self._weight.data)

        if self._use_bias:
            nn.init.constant_(self._bias.data, self._bias_init_value)

        if self._use_bn:
            self._bn.reset_parameters()

Example #4

0

Show file

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 n,
                 activation=torch.relu_,
                 strides=1,
                 padding=0,
                 use_bias=None,
                 use_bn=False,
                 kernel_initializer=None,
                 kernel_init_gain=1.0,
                 bias_init_value=0.0):
        """A parallel ConvTranspose2D layer that can be used to perform n
        independent 2D transposed convolutions in parallel.

        Args:
            in_channels (int): channels of the input image
            out_channels (int): channels of the output image
            kernel_size (int or tuple):
            n (int): n independent ``ConvTranspose2D`` layers
            activation (torch.nn.functional):
            strides (int or tuple):
            padding (int or tuple):
            use_bias (bool|None): If None, will use ``not use_bn``
            use_bn (bool):
            kernel_initializer (Callable): initializer for the conv_trans layer.
                If None is provided a ``variance_scaling_initializer`` with gain
                as ``kernel_init_gain`` will be used.
            kernel_init_gain (float): a scaling factor (gain) applied to the
                std of kernel init distribution. It will be ignored if
                ``kernel_initializer`` is not None.
            bias_init_value (float): a constant
        """
        super(ParallelConvTranspose2D, self).__init__()
        if use_bias is None:
            use_bias = not use_bn
        self._activation = activation
        self._n = n
        self._in_channels = in_channels
        self._out_channels = out_channels
        self._kernel_size = common.tuplify2d(kernel_size)
        self._conv_trans2d = nn.ConvTranspose2d(in_channels * n,
                                                out_channels * n,
                                                kernel_size,
                                                groups=n,
                                                stride=strides,
                                                padding=padding,
                                                bias=use_bias)

        for i in range(n):
            if kernel_initializer is None:
                variance_scaling_init(
                    self._conv_trans2d.weight.data[i * in_channels:(i + 1) *
                                                   in_channels],
                    gain=kernel_init_gain,
                    nonlinearity=self._activation)
            else:
                kernel_initializer(
                    self._conv_trans2d.weight.data[i * in_channels:(i + 1) *
                                                   in_channels])

        # [n*C, C', kernel_size, kernel_size]->[n, C, C', kernel_size, kernel_size]
        self._weight = self._conv_trans2d.weight.view(self._n,
                                                      self._in_channels,
                                                      self._out_channels,
                                                      self._kernel_size[0],
                                                      self._kernel_size[1])

        if use_bias:
            nn.init.constant_(self._conv_trans2d.bias.data, bias_init_value)
            # [n*C]->[n, C]
            self._bias = self._conv_trans2d.bias.view(self._n,
                                                      self._out_channels)
        else:
            self._bias = None

        if use_bn:
            self._bn = nn.BatchNorm2d(n * out_channels)
        else:
            self._bn = None