예제 #1
0
 def __init__(
     self,
     channels,
     hidden_channels,
     kernel_size,
     dilation_rate,
     num_layers,
     dropout_p=0,
     cond_channels=0,
     mean_only=False,
 ):
     assert channels % 2 == 0, "channels should be divisible by 2"
     super().__init__()
     self.half_channels = channels // 2
     self.mean_only = mean_only
     # input layer
     self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1)
     # coupling layers
     self.enc = WN(
         hidden_channels,
         hidden_channels,
         kernel_size,
         dilation_rate,
         num_layers,
         dropout_p=dropout_p,
         c_in_channels=cond_channels,
     )
     # output layer
     # Initializing last layer to 0 makes the affine coupling layers
     # do nothing at first.  This helps with training stability
     self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1)
     self.post.weight.data.zero_()
     self.post.bias.data.zero_()
예제 #2
0
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        hidden_channels: int,
        kernel_size: int,
        dilation_rate: int,
        num_layers: int,
        cond_channels=0,
    ):
        """Posterior Encoder of VITS model.

        ::
            x -> conv1x1() -> WaveNet() (non-causal) -> conv1x1() -> split() -> [m, s] -> sample(m, s) -> z

        Args:
            in_channels (int): Number of input tensor channels.
            out_channels (int): Number of output tensor channels.
            hidden_channels (int): Number of hidden channels.
            kernel_size (int): Kernel size of the WaveNet convolution layers.
            dilation_rate (int): Dilation rate of the WaveNet layers.
            num_layers (int): Number of the WaveNet layers.
            cond_channels (int, optional): Number of conditioning tensor channels. Defaults to 0.
        """
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.hidden_channels = hidden_channels
        self.kernel_size = kernel_size
        self.dilation_rate = dilation_rate
        self.num_layers = num_layers
        self.cond_channels = cond_channels

        self.pre = nn.Conv1d(in_channels, hidden_channels, 1)
        self.enc = WN(
            hidden_channels,
            hidden_channels,
            kernel_size,
            dilation_rate,
            num_layers,
            c_in_channels=cond_channels,
        )
        self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1)