예제 #1
0
    def __init__(
        self,
        in_channels: int = 513,
        out_channels: int = 192,
        hidden_channels: int = 192,
        kernel_size: int = 5,
        layers: int = 16,
        stacks: int = 1,
        base_dilation: int = 1,
        global_channels: int = -1,
        dropout_rate: float = 0.0,
        bias: bool = True,
        use_weight_norm: bool = True,
    ):
        """Initilialize PosteriorEncoder module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            hidden_channels (int): Number of hidden channels.
            kernel_size (int): Kernel size in WaveNet.
            layers (int): Number of layers of WaveNet.
            stacks (int): Number of repeat stacking of WaveNet.
            base_dilation (int): Base dilation factor.
            global_channels (int): Number of global conditioning channels.
            dropout_rate (float): Dropout rate.
            bias (bool): Whether to use bias parameters in conv.
            use_weight_norm (bool): Whether to apply weight norm.

        """
        super().__init__()

        # define modules
        self.input_conv = Conv1d(in_channels, hidden_channels, 1)
        self.encoder = WaveNet(
            in_channels=-1,
            out_channels=-1,
            kernel_size=kernel_size,
            layers=layers,
            stacks=stacks,
            base_dilation=base_dilation,
            residual_channels=hidden_channels,
            aux_channels=-1,
            gate_channels=hidden_channels * 2,
            skip_channels=hidden_channels,
            global_channels=global_channels,
            dropout_rate=dropout_rate,
            bias=bias,
            use_weight_norm=use_weight_norm,
            use_first_conv=False,
            use_last_conv=False,
            scale_residual=False,
            scale_skip_connect=True,
        )
        self.proj = Conv1d(hidden_channels, out_channels * 2, 1)
예제 #2
0
    def __init__(
        self,
        upsample_scales: List[int],
        nonlinear_activation: Optional[str] = None,
        nonlinear_activation_params: Dict[str, Any] = {},
        interpolate_mode: str = "nearest",
        freq_axis_kernel_size: int = 1,
        aux_channels: int = 80,
        aux_context_window: int = 0,
    ):
        """Initialize ConvInUpsampleNetwork module.

        Args:
            upsample_scales (list): List of upsampling scales.
            nonlinear_activation (Optional[str]): Activation function name.
            nonlinear_activation_params (Dict[str, Any]): Arguments for the specified
                activation function.
            mode (str): Interpolation mode.
            freq_axis_kernel_size (int): Kernel size in the direction of
                frequency axis.
            aux_channels (int): Number of channels of pre-conv layer.
            aux_context_window (int): Context window size of the pre-conv layer.

        """
        super().__init__()
        self.aux_context_window = aux_context_window
        # To capture wide-context information in conditional features
        kernel_size = 2 * aux_context_window + 1
        # NOTE(kan-bayashi): Use pad here, which is not used in parallel_wavegan
        self.pad = torch.nn.ReplicationPad1d(aux_context_window)
        self.conv_in = Conv1d(
            aux_channels,
            aux_channels,
            kernel_size=kernel_size,
            bias=False,
        )
        self.upsample = UpsampleNetwork(
            upsample_scales=upsample_scales,
            nonlinear_activation=nonlinear_activation,
            nonlinear_activation_params=nonlinear_activation_params,
            interpolate_mode=interpolate_mode,
            freq_axis_kernel_size=freq_axis_kernel_size,
        )
예제 #3
0
    def __init__(
        self,
        in_channels: int = 1,
        out_channels: int = 1,
        kernel_size: int = 3,
        layers: int = 10,
        conv_channels: int = 64,
        dilation_factor: int = 1,
        nonlinear_activation: str = "LeakyReLU",
        nonlinear_activation_params: Dict[str, Any] = {"negative_slope": 0.2},
        bias: bool = True,
        use_weight_norm: bool = True,
    ):
        """Initialize ParallelWaveGANDiscriminator module.

        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            kernel_size (int): Number of output channels.
            layers (int): Number of conv layers.
            conv_channels (int): Number of chnn layers.
            dilation_factor (int): Dilation factor. For example, if dilation_factor = 2,
                the dilation will be 2, 4, 8, ..., and so on.
            nonlinear_activation (str): Nonlinear function after each conv.
            nonlinear_activation_params (Dict[str, Any]): Nonlinear function parameters
            bias (bool): Whether to use bias parameter in conv.
            use_weight_norm (bool) Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.

        """
        super().__init__()
        assert (kernel_size -
                1) % 2 == 0, "Not support even number kernel size."
        assert dilation_factor > 0, "Dilation factor must be > 0."
        self.conv_layers = torch.nn.ModuleList()
        conv_in_channels = in_channels
        for i in range(layers - 1):
            if i == 0:
                dilation = 1
            else:
                dilation = i if dilation_factor == 1 else dilation_factor**i
                conv_in_channels = conv_channels
            padding = (kernel_size - 1) // 2 * dilation
            conv_layer = [
                Conv1d(
                    conv_in_channels,
                    conv_channels,
                    kernel_size=kernel_size,
                    padding=padding,
                    dilation=dilation,
                    bias=bias,
                ),
                getattr(torch.nn,
                        nonlinear_activation)(inplace=True,
                                              **nonlinear_activation_params),
            ]
            self.conv_layers += conv_layer
        padding = (kernel_size - 1) // 2
        last_conv_layer = Conv1d(
            conv_in_channels,
            out_channels,
            kernel_size=kernel_size,
            padding=padding,
            bias=bias,
        )
        self.conv_layers += [last_conv_layer]

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()