Ejemplo n.º 1
0
 def __init__(self, num_mel=80, embedding_size=512):
     super(TransformerTTS, self).__init__()
     self.encoder = Encoder()
     self.decoder = Decoder()
     self.postnet = PostNet()
     self.stop_linear = Linear(embedding_size, 1, w_init='sigmoid')
     self.mel_linear = Linear(embedding_size, num_mel)
Ejemplo n.º 2
0
 def __init__(self, use_postnet=True, n_spkers=1):
     super(FastSpeech2, self).__init__()
     
     ### Speaker Embedding Table ###
     self.use_spk_embed = hp.use_spk_embed
     if self.use_spk_embed:
         self.n_spkers = n_spkers
         self.spk_embed_dim = hp.spk_embed_dim
         self.spk_embed_weight_std = hp.spk_embed_weight_std
         self.embed_speakers = Embedding(n_spkers, self.spk_embed_dim, padding_idx=None, std=self.spk_embed_weight_std)
         
     self.use_emo_embed = hp.use_emo_embed
     if self.use_emo_embed:
         self.n_emotes = n_emotes
         self.emo_embed_dim = hp.emo_embed_dim
         self.emo_embed_weight_std = hp.emo_embed_weight_std
         self.embed_emotions = Embedding(n_emotes, self.emo_embed_dim, padding_idx=None, std=self.emo_embed_weight_std)
     
     ### Encoder, Speaker Integrator, Variance Adaptor, Deocder, Postnet ###
     self.encoder = Encoder()
     if self.use_spk_embed:
         self.speaker_integrator = SpeakerIntegrator()
     self.variance_adaptor = VarianceAdaptor()
     self.decoder = Decoder()
     self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)
     self.use_postnet = use_postnet
     if self.use_postnet:
         self.postnet = PostNet()
Ejemplo n.º 3
0
    def __init__(self):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder()
        self.variance_adaptor = VarianceAdaptor()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_hidden, hp.n_mel_channels)
        self.postnet = PostNet()
Ejemplo n.º 4
0
    def __init__(self):
        super(FastSpeech, self).__init__()

        self.encoder = Encoder()
        self.length_regulator = LengthRegulator()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_output_size, hp.num_mels)
        self.postnet = PostNet()
Ejemplo n.º 5
0
    def __init__(self, use_postnet=True):
        super(STYLER, self).__init__()

        self.style_modeling = StyleModeling()

        self.decoder = Decoder()
        self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)

        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = PostNet()
        encoder_output = None
    def __init__(self, use_postnet=True):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder()
        self.variance_adaptor = TacotronDuration()

        self.decoder = Decoder()
        self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)

        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = PostNet()
Ejemplo n.º 7
0
    def __init__(self, use_postnet=True):
        super(FastSpeech2, self).__init__()
        
#         self.gst = GST()
        self.encoder = Encoder()
        self.variance_adaptor = VarianceAdaptor()

        self.decoder = Decoder()
        
        if hp.vocoder=='WORLD':
#             self.f0_decoder= Decoder()
            self.ap_linear = nn.Linear(hp.decoder_hidden, hp.n_ap_channels)
            self.sp_linear = nn.Linear(hp.decoder_hidden, hp.n_sp_channels)
        else:
            self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)
        
        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = PostNet()