def __init__(self, num_chars, embedding_dim=256, linear_dim=1025, mel_dim=80, r=5, padding_idx=None, memory_size=5, attn_windowing=False, forward_attention=False): super(Tacotron, self).__init__() self.r = r self.mel_dim = mel_dim self.linear_dim = linear_dim self.embedding = nn.Embedding(num_chars, embedding_dim, padding_idx=padding_idx) self.embedding.weight.data.normal_(0, 0.3) self.encoder = Encoder(embedding_dim) self.decoder = Decoder(256, mel_dim, r, memory_size, attn_windowing, forward_attention) self.postnet = PostCBHG(mel_dim) self.last_linear = nn.Sequential( nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim), nn.Sigmoid())
def __init__(self, num_chars, num_speakers, r=5, linear_dim=1025, mel_dim=80, memory_size=5, attn_win=False, attn_norm="sigmoid", prenet_type="original", prenet_dropout=True, forward_attn=False, trans_agent=False, forward_attn_mask=False, location_attn=True, separate_stopnet=True): super(Tacotron, self).__init__() self.r = r self.mel_dim = mel_dim self.linear_dim = linear_dim self.embedding = nn.Embedding(num_chars, 256) self.embedding.weight.data.normal_(0, 0.3) if num_speakers > 1: self.speaker_embedding = nn.Embedding(num_speakers, 256) self.speaker_embedding.weight.data.normal_(0, 0.3) self.encoder = Encoder(256) self.decoder = Decoder(256, mel_dim, r, memory_size, attn_win, attn_norm, prenet_type, prenet_dropout, forward_attn, trans_agent, forward_attn_mask, location_attn, separate_stopnet) self.postnet = PostCBHG(mel_dim) self.last_linear = nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim)
def test_in_out(self): layer = Encoder(128) dummy_input = T.rand(4, 8, 128) print(layer) output = layer(dummy_input) print(output.shape) assert output.shape[0] == 4 assert output.shape[1] == 8 assert output.shape[2] == 256 # 128 * 2 BiRNN
def __init__(self, embedding_dim=256, linear_dim=1025, mel_dim=80, r=5, padding_idx=None): super(Tacotron, self).__init__() self.r = r self.mel_dim = mel_dim self.linear_dim = linear_dim self.embedding = nn.Embedding(len(symbols), embedding_dim, padding_idx=padding_idx) print(" | > Number of characters : {}".format(len(symbols))) self.embedding.weight.data.normal_(0, 0.3) self.encoder = Encoder(embedding_dim) self.decoder = Decoder(256, mel_dim, r) self.postnet = CBHG(mel_dim, K=8, projections=[256, mel_dim]) self.last_linear = nn.Linear(mel_dim * 2, linear_dim)
def __init__(self, embedding_dim=256, linear_dim=1025, mel_dim=80, r=5, padding_idx=None): super(Tacotron, self).__init__() self.r = r self.mel_dim = mel_dim self.linear_dim = linear_dim self.embedding = nn.Embedding( len(symbols), embedding_dim, padding_idx=padding_idx) print(" | > Number of characters : {}".format(len(symbols))) self.embedding.weight.data.normal_(0, 0.3) self.encoder = Encoder(embedding_dim) self.decoder = Decoder(256, mel_dim, r) self.postnet = PostCBHG(mel_dim) self.last_linear = nn.Sequential( nn.Linear(self.postnet.cbhg.gru_features * 2, linear_dim), nn.Sigmoid())