def __init__( self, in_channels: int = 513, out_channels: int = 192, hidden_channels: int = 192, kernel_size: int = 5, layers: int = 16, stacks: int = 1, base_dilation: int = 1, global_channels: int = -1, dropout_rate: float = 0.0, bias: bool = True, use_weight_norm: bool = True, ): """Initilialize PosteriorEncoder module. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. hidden_channels (int): Number of hidden channels. kernel_size (int): Kernel size in WaveNet. layers (int): Number of layers of WaveNet. stacks (int): Number of repeat stacking of WaveNet. base_dilation (int): Base dilation factor. global_channels (int): Number of global conditioning channels. dropout_rate (float): Dropout rate. bias (bool): Whether to use bias parameters in conv. use_weight_norm (bool): Whether to apply weight norm. """ super().__init__() # define modules self.input_conv = Conv1d(in_channels, hidden_channels, 1) self.encoder = WaveNet( in_channels=-1, out_channels=-1, kernel_size=kernel_size, layers=layers, stacks=stacks, base_dilation=base_dilation, residual_channels=hidden_channels, aux_channels=-1, gate_channels=hidden_channels * 2, skip_channels=hidden_channels, global_channels=global_channels, dropout_rate=dropout_rate, bias=bias, use_weight_norm=use_weight_norm, use_first_conv=False, use_last_conv=False, scale_residual=False, scale_skip_connect=True, ) self.proj = Conv1d(hidden_channels, out_channels * 2, 1)
def __init__( self, upsample_scales: List[int], nonlinear_activation: Optional[str] = None, nonlinear_activation_params: Dict[str, Any] = {}, interpolate_mode: str = "nearest", freq_axis_kernel_size: int = 1, aux_channels: int = 80, aux_context_window: int = 0, ): """Initialize ConvInUpsampleNetwork module. Args: upsample_scales (list): List of upsampling scales. nonlinear_activation (Optional[str]): Activation function name. nonlinear_activation_params (Dict[str, Any]): Arguments for the specified activation function. mode (str): Interpolation mode. freq_axis_kernel_size (int): Kernel size in the direction of frequency axis. aux_channels (int): Number of channels of pre-conv layer. aux_context_window (int): Context window size of the pre-conv layer. """ super().__init__() self.aux_context_window = aux_context_window # To capture wide-context information in conditional features kernel_size = 2 * aux_context_window + 1 # NOTE(kan-bayashi): Use pad here, which is not used in parallel_wavegan self.pad = torch.nn.ReplicationPad1d(aux_context_window) self.conv_in = Conv1d( aux_channels, aux_channels, kernel_size=kernel_size, bias=False, ) self.upsample = UpsampleNetwork( upsample_scales=upsample_scales, nonlinear_activation=nonlinear_activation, nonlinear_activation_params=nonlinear_activation_params, interpolate_mode=interpolate_mode, freq_axis_kernel_size=freq_axis_kernel_size, )
def __init__( self, in_channels: int = 1, out_channels: int = 1, kernel_size: int = 3, layers: int = 10, conv_channels: int = 64, dilation_factor: int = 1, nonlinear_activation: str = "LeakyReLU", nonlinear_activation_params: Dict[str, Any] = {"negative_slope": 0.2}, bias: bool = True, use_weight_norm: bool = True, ): """Initialize ParallelWaveGANDiscriminator module. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. kernel_size (int): Number of output channels. layers (int): Number of conv layers. conv_channels (int): Number of chnn layers. dilation_factor (int): Dilation factor. For example, if dilation_factor = 2, the dilation will be 2, 4, 8, ..., and so on. nonlinear_activation (str): Nonlinear function after each conv. nonlinear_activation_params (Dict[str, Any]): Nonlinear function parameters bias (bool): Whether to use bias parameter in conv. use_weight_norm (bool) Whether to use weight norm. If set to true, it will be applied to all of the conv layers. """ super().__init__() assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." assert dilation_factor > 0, "Dilation factor must be > 0." self.conv_layers = torch.nn.ModuleList() conv_in_channels = in_channels for i in range(layers - 1): if i == 0: dilation = 1 else: dilation = i if dilation_factor == 1 else dilation_factor**i conv_in_channels = conv_channels padding = (kernel_size - 1) // 2 * dilation conv_layer = [ Conv1d( conv_in_channels, conv_channels, kernel_size=kernel_size, padding=padding, dilation=dilation, bias=bias, ), getattr(torch.nn, nonlinear_activation)(inplace=True, **nonlinear_activation_params), ] self.conv_layers += conv_layer padding = (kernel_size - 1) // 2 last_conv_layer = Conv1d( conv_in_channels, out_channels, kernel_size=kernel_size, padding=padding, bias=bias, ) self.conv_layers += [last_conv_layer] # apply weight norm if use_weight_norm: self.apply_weight_norm()