def __init__(self, in_channels, out_channels, kernel_size, activation=torch.relu_, strides=1, padding=0, use_bias=None, use_bn=False, kernel_initializer=None, kernel_init_gain=1.0, bias_init_value=0.0): """A 2D ConvTranspose layer that's also responsible for activation and customized weights initialization. An auto gain calculation might depend on the activation following the conv layer. Suggest using this wrapper module instead of ``nn.ConvTranspose2d`` if you really care about weight std after init. Args: in_channels (int): channels of the input image out_channels (int): channels of the output image kernel_size (int or tuple): activation (torch.nn.functional): strides (int or tuple): padding (int or tuple): use_bias (bool|None): If None, will use ``not use_bn`` use_bn (bool): whether use batch normalization kernel_initializer (Callable): initializer for the conv_trans layer. If None is provided a variance_scaling_initializer with gain as ``kernel_init_gain`` will be used. kernel_init_gain (float): a scaling factor (gain) applied to the std of kernel init distribution. It will be ignored if ``kernel_initializer`` is not None. bias_init_value (float): a constant """ super(ConvTranspose2D, self).__init__() if use_bias is None: use_bias = not use_bn self._activation = activation self._conv_trans2d = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride=strides, padding=padding, bias=use_bias) if kernel_initializer is None: variance_scaling_init(self._conv_trans2d.weight.data, gain=kernel_init_gain, nonlinearity=self._activation, transposed=True) else: kernel_initializer(self._conv_trans2d.weight.data) if use_bias: nn.init.constant_(self._conv_trans2d.bias.data, bias_init_value) if use_bn: self._bn = nn.BatchNorm2d(out_channels) else: self._bn = None
def __init__(self, input_size, output_size, n, activation=identity, use_bias=True, use_bn=False, use_ln=False, kernel_initializer=None, kernel_init_gain=1.0, bias_init_value=0.0): """Parallel FC layer. It is equivalent to ``n`` separate FC layers with the same ``input_size`` and ``output_size``. Args: input_size (int): input size output_size (int): output size n (int): n independent ``FC`` layers activation (torch.nn.functional): use_bn (bool): whether use Batch Normalization. use_ln (bool): whether use layer normalization use_bias (bool): whether use bias kernel_initializer (Callable): initializer for the FC layer kernel. If none is provided a ``variance_scaling_initializer`` with gain as ``kernel_init_gain`` will be used. kernel_init_gain (float): a scaling factor (gain) applied to the std of kernel init distribution. It will be ignored if ``kernel_initializer`` is not None. bias_init_value (float): a constant """ super().__init__() self._activation = activation self._weight = nn.Parameter(torch.Tensor(n, output_size, input_size)) if use_bias: self._bias = nn.Parameter(torch.Tensor(n, output_size)) else: self._bias = None for i in range(n): if kernel_initializer is None: variance_scaling_init(self._weight.data[i], gain=kernel_init_gain, nonlinearity=self._activation) else: kernel_initializer(self._weight.data[i]) if use_bias: nn.init.constant_(self._bias.data, bias_init_value) if use_bn: self._bn = nn.BatchNorm1d(n * output_size) else: self._bn = None if use_ln: self._ln = nn.GroupNorm(n, n * output_size) else: self._ln = None
def reset_parameters(self): if self._kernel_initializer is None: variance_scaling_init(self._weight.data, gain=self._kernel_init_gain, nonlinearity=self._activation) else: self._kernel_initializer(self._weight.data) if self._use_bias: nn.init.constant_(self._bias.data, self._bias_init_value) if self._use_bn: self._bn.reset_parameters()
def __init__(self, in_channels, out_channels, kernel_size, n, activation=torch.relu_, strides=1, padding=0, use_bias=None, use_bn=False, kernel_initializer=None, kernel_init_gain=1.0, bias_init_value=0.0): """A parallel ConvTranspose2D layer that can be used to perform n independent 2D transposed convolutions in parallel. Args: in_channels (int): channels of the input image out_channels (int): channels of the output image kernel_size (int or tuple): n (int): n independent ``ConvTranspose2D`` layers activation (torch.nn.functional): strides (int or tuple): padding (int or tuple): use_bias (bool|None): If None, will use ``not use_bn`` use_bn (bool): kernel_initializer (Callable): initializer for the conv_trans layer. If None is provided a ``variance_scaling_initializer`` with gain as ``kernel_init_gain`` will be used. kernel_init_gain (float): a scaling factor (gain) applied to the std of kernel init distribution. It will be ignored if ``kernel_initializer`` is not None. bias_init_value (float): a constant """ super(ParallelConvTranspose2D, self).__init__() if use_bias is None: use_bias = not use_bn self._activation = activation self._n = n self._in_channels = in_channels self._out_channels = out_channels self._kernel_size = common.tuplify2d(kernel_size) self._conv_trans2d = nn.ConvTranspose2d(in_channels * n, out_channels * n, kernel_size, groups=n, stride=strides, padding=padding, bias=use_bias) for i in range(n): if kernel_initializer is None: variance_scaling_init( self._conv_trans2d.weight.data[i * in_channels:(i + 1) * in_channels], gain=kernel_init_gain, nonlinearity=self._activation) else: kernel_initializer( self._conv_trans2d.weight.data[i * in_channels:(i + 1) * in_channels]) # [n*C, C', kernel_size, kernel_size]->[n, C, C', kernel_size, kernel_size] self._weight = self._conv_trans2d.weight.view(self._n, self._in_channels, self._out_channels, self._kernel_size[0], self._kernel_size[1]) if use_bias: nn.init.constant_(self._conv_trans2d.bias.data, bias_init_value) # [n*C]->[n, C] self._bias = self._conv_trans2d.bias.view(self._n, self._out_channels) else: self._bias = None if use_bn: self._bn = nn.BatchNorm2d(n * out_channels) else: self._bn = None