def __init__(self, in_channels=1, out_channels=1, kernel_size=3, layers=10, conv_channels=64, dilation_factor=1, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.2}, bias=True, use_weight_norm=True, ): """Initialize Parallel WaveGAN Discriminator module. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. kernel_size (int): Number of output channels. layers (int): Number of conv layers. conv_channels (int): Number of chnn layers. dilation_factor (int): Dilation factor. For example, if dilation_factor = 2, the dilation will be 2, 4, 8, ..., and so on. nonlinear_activation (str): Nonlinear function after each conv. nonlinear_activation_params (dict): Nonlinear function parameters bias (bool): Whether to use bias parameter in conv. use_weight_norm (bool) Whether to use weight norm. If set to true, it will be applied to all of the conv layers. """ super(ParallelWaveGANDiscriminator, self).__init__() assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." assert dilation_factor > 0, "Dilation factor must be > 0." self.conv_layers = torch.nn.ModuleList() conv_in_channels = in_channels for i in range(layers - 1): if i == 0: dilation = 1 else: dilation = i if dilation_factor == 1 else dilation_factor ** i conv_in_channels = conv_channels padding = (kernel_size - 1) // 2 * dilation conv_layer = [ Conv1d(conv_in_channels, conv_channels, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=bias), getattr(torch.nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params) ] self.conv_layers += conv_layer padding = (kernel_size - 1) // 2 last_conv_layer = Conv1d( conv_in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias) self.conv_layers += [last_conv_layer] # apply weight norm if use_weight_norm: self.apply_weight_norm()
def __init__(self, upsample_scales, upsample_activation="none", upsample_activation_params={}, mode="nearest", freq_axis_kernel_size=1, aux_channels=80, aux_context_window=0): """Initialize convolution + upsampling network module. Args: upsample_scales (list): List of upsampling scales. upsample_activation (str): Activation function name. upsample_activation_params (dict): Arguments for specified activation function. mode (str): Interpolation mode. freq_axis_kernel_size (int): Kernel size in the direction of frequency axis. aux_channels (int): Number of channels of pre-convolutional layer. aux_context_window (int): Context window size of the pre-convolutional layer. """ super(ConvInUpsampleNetwork, self).__init__() # To capture wide-context information in conditional features kernel_size = 2 * aux_context_window + 1 # NOTE(kan-bayashi): Here do not use padding because the input is already padded self.conv_in = Conv1d(aux_channels, aux_channels, kernel_size=kernel_size, bias=False) self.upsample = UpsampleNetwork( upsample_scales=upsample_scales, upsample_activation=upsample_activation, upsample_activation_params=upsample_activation_params, mode=mode, freq_axis_kernel_size=freq_axis_kernel_size, )
def __init__(self, in_channels=1, out_channels=1, kernel_size=3, layers=10, conv_channels=64, dilation_factor=1, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.2}, bias=True, use_weight_norm=True, ): super(ParallelWaveGANDiscriminator, self).__init__() assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." assert dilation_factor > 0, "Dilation factor must be > 0." self.conv_layers = torch.nn.ModuleList() conv_in_channels = in_channels for i in range(layers - 1): if i == 0: dilation = 1 else: dilation = i if dilation_factor == 1 else dilation_factor ** i conv_in_channels = conv_channels padding = (kernel_size - 1) // 2 * dilation conv_layer = [ Conv1d(conv_in_channels, conv_channels, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=bias), getattr(torch.nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params) ] self.conv_layers += conv_layer padding = (kernel_size - 1) // 2 last_conv_layer = Conv1d( conv_in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias) self.conv_layers += [last_conv_layer] # apply weight norm if use_weight_norm: self.apply_weight_norm()
def __init__(self, upsample_scales, nonlinear_activation=None, nonlinear_activation_params={}, interpolate_mode="nearest", freq_axis_kernel_size=1, aux_channels=80, aux_context_window=0, use_causal_conv=False ): super(ConvInUpsampleNetwork, self).__init__() self.aux_context_window = aux_context_window self.use_causal_conv = use_causal_conv and aux_context_window > 0 # To capture wide-context information in conditional features kernel_size = aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1 self.conv_in = Conv1d(aux_channels, aux_channels, kernel_size=kernel_size, bias=False) self.upsample = UpsampleNetwork( upsample_scales=upsample_scales, nonlinear_activation=nonlinear_activation, nonlinear_activation_params=nonlinear_activation_params, interpolate_mode=interpolate_mode, freq_axis_kernel_size=freq_axis_kernel_size, use_causal_conv=use_causal_conv,)
def test_conv_initialization(): conv = Conv1d(10, 10, 3, bias=True) np.testing.assert_array_equal(conv.bias.data.numpy(), np.zeros_like(conv.bias.data.numpy())) conv1x1 = Conv1d1x1(10, 10, bias=True) np.testing.assert_array_equal(conv1x1.bias.data.numpy(), np.zeros_like(conv1x1.bias.data.numpy())) kernel_size = (10, 10) conv2d = Conv2d(10, 10, kernel_size, bias=True) np.testing.assert_array_equal( conv2d.weight.data.numpy(), np.ones_like(conv2d.weight.data.numpy()) / np.prod(kernel_size)) np.testing.assert_array_equal(conv2d.bias.data.numpy(), np.zeros_like(conv2d.bias.data.numpy())) kernel_size = (1, 10) conv2d = Conv2d(10, 10, kernel_size, bias=True) np.testing.assert_array_equal( conv2d.weight.data.numpy(), np.ones_like(conv2d.weight.data.numpy()) / np.prod(kernel_size)) np.testing.assert_array_equal(conv2d.bias.data.numpy(), np.zeros_like(conv2d.bias.data.numpy()))
def __init__(self, upsample_scales, nonlinear_activation=None, nonlinear_activation_params={}, interpolate_mode="nearest", freq_axis_kernel_size=1, aux_channels=80, aux_context_window=2, aux_context_pad=True, use_causal_conv=False ): """Initialize convolution + upsampling network module. Args: upsample_scales (list): List of upsampling scales. nonlinear_activation (str): Activation function name. nonlinear_activation_params (dict): Arguments for specified activation function. mode (str): Interpolation mode. freq_axis_kernel_size (int): Kernel size in the direction of frequency axis. aux_channels (int): Number of channels of pre-convolutional layer. aux_context_window (int): Context window size of the pre-convolutional layer. use_causal_conv (bool): Whether to use causal structure. """ super(ConvInUpsampleNetwork, self).__init__() self.aux_context_window = aux_context_window self.use_causal_conv = use_causal_conv and aux_context_window > 0 # To capture wide-context information in conditional features kernel_size = aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1 # NOTE(kan-bayashi): Here do not use padding because the input is already padded padd = aux_context_window if aux_context_pad else 0 self.conv_in = Conv1d(aux_channels, aux_channels, kernel_size=kernel_size, padding=padd, bias=False) self.upsample = UpsampleNetwork( upsample_scales=upsample_scales, nonlinear_activation=nonlinear_activation, nonlinear_activation_params=nonlinear_activation_params, interpolate_mode=interpolate_mode, freq_axis_kernel_size=freq_axis_kernel_size, use_causal_conv=use_causal_conv, )