Ejemplo n.º 1
0
 def __init__(self, in_channels, out_channels, hidden_channels, params):
     super().__init__()
     self.res_conv_block = ResidualConv1dBNBlock(in_channels, hidden_channels, hidden_channels, **params)
     self.post_conv = nn.Conv1d(hidden_channels, hidden_channels, 1)
     self.postnet = nn.Sequential(
         Conv1dBNBlock(
             hidden_channels, hidden_channels, hidden_channels, params["kernel_size"], 1, num_conv_blocks=2
         ),
         nn.Conv1d(hidden_channels, out_channels, 1),
     )
Ejemplo n.º 2
0
 def __init__(self, in_channels, out_channels, hidden_channels, params):
     super().__init__()
     self.prenet = ResidualConv1dBNBlock(in_channels,
                                         hidden_channels,
                                         hidden_channels,
                                         kernel_size=5,
                                         num_res_blocks=3,
                                         num_conv_blocks=1,
                                         dilations=[1, 1, 1])
     self.rel_pos_transformer = RelativePositionTransformer(
         hidden_channels, out_channels, hidden_channels, **params)
Ejemplo n.º 3
0
    def __init__(self, in_channels, out_channels, hidden_channels, params):
        super().__init__()
        self.prenet = nn.Sequential(nn.Conv1d(in_channels, hidden_channels, 1),
                                    nn.ReLU())
        self.res_conv_block = ResidualConv1dBNBlock(hidden_channels,
                                                    hidden_channels,
                                                    hidden_channels, **params)

        self.postnet = nn.Sequential(*[
            nn.Conv1d(hidden_channels, hidden_channels, 1),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_channels),
            nn.Conv1d(hidden_channels, out_channels, 1),
        ])
Ejemplo n.º 4
0
    def __init__(
        self,
        num_chars,
        out_channels,
        hidden_channels,
        hidden_channels_dp,
        encoder_type,
        encoder_params,
        dropout_p_dp=0.1,
        mean_only=False,
        use_prenet=True,
        c_in_channels=0,
    ):
        super().__init__()
        # class arguments
        self.num_chars = num_chars
        self.out_channels = out_channels
        self.hidden_channels = hidden_channels
        self.hidden_channels_dp = hidden_channels_dp
        self.dropout_p_dp = dropout_p_dp
        self.mean_only = mean_only
        self.use_prenet = use_prenet
        self.c_in_channels = c_in_channels
        self.encoder_type = encoder_type
        # embedding layer
        self.emb = nn.Embedding(num_chars, hidden_channels)
        nn.init.normal_(self.emb.weight, 0.0, hidden_channels ** -0.5)
        # init encoder module
        if encoder_type.lower() == "rel_pos_transformer":
            if use_prenet:
                self.prenet = ResidualConv1dLayerNormBlock(
                    hidden_channels, hidden_channels, hidden_channels, kernel_size=5, num_layers=3, dropout_p=0.5
                )
            self.encoder = RelativePositionTransformer(
                hidden_channels, hidden_channels, hidden_channels, **encoder_params
            )
        elif encoder_type.lower() == "gated_conv":
            self.encoder = GatedConvBlock(hidden_channels, **encoder_params)
        elif encoder_type.lower() == "residual_conv_bn":
            if use_prenet:
                self.prenet = nn.Sequential(nn.Conv1d(hidden_channels, hidden_channels, 1), nn.ReLU())
            self.encoder = ResidualConv1dBNBlock(hidden_channels, hidden_channels, hidden_channels, **encoder_params)
            self.postnet = nn.Sequential(
                nn.Conv1d(self.hidden_channels, self.hidden_channels, 1), nn.BatchNorm1d(self.hidden_channels)
            )
        elif encoder_type.lower() == "time_depth_separable":
            if use_prenet:
                self.prenet = ResidualConv1dLayerNormBlock(
                    hidden_channels, hidden_channels, hidden_channels, kernel_size=5, num_layers=3, dropout_p=0.5
                )
            self.encoder = TimeDepthSeparableConvBlock(
                hidden_channels, hidden_channels, hidden_channels, **encoder_params
            )
        else:
            raise ValueError(" [!] Unkown encoder type.")

        # final projection layers
        self.proj_m = nn.Conv1d(hidden_channels, out_channels, 1)
        if not mean_only:
            self.proj_s = nn.Conv1d(hidden_channels, out_channels, 1)
        # duration predictor
        self.duration_predictor = DurationPredictor(
            hidden_channels + c_in_channels, hidden_channels_dp, 3, dropout_p_dp
        )