def __init__(self, n_src_vocab=len(symbols) + 1, len_max_seq=hp.max_seq_len, d_word_vec=hp.encoder_hidden, n_layers=hp.encoder_layer, n_head=hp.encoder_head, d_k=hp.encoder_hidden // hp.encoder_head, d_v=hp.encoder_hidden // hp.encoder_head, d_model=hp.encoder_hidden, d_inner=hp.fft_conv1d_filter_size, dropout=hp.encoder_dropout): super(Encoder, self).__init__() n_position = len_max_seq + 1 self.src_word_emb = nn.Embedding(n_src_vocab, d_word_vec, padding_idx=Constants.PAD) self.position_enc = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(n_position, d_word_vec, padding_idx=0), freeze=True) self.layer_stack = nn.ModuleList([ FFTBlock(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])
def __init__(self, py_vocab_size, hz_vocab_size=None, len_max_seq=hp.max_seq_len, d_word_vec=hp.encoder_hidden, n_layers=hp.encoder_layer, n_head=hp.encoder_head, d_k=hp.encoder_hidden // hp.encoder_head, d_v=hp.encoder_hidden // hp.encoder_head, d_model=hp.encoder_hidden, d_inner=hp.fft_conv1d_filter_size, dropout=hp.encoder_dropout): super(Encoder, self).__init__() n_position = len_max_seq + 1 self.src_word_emb = nn.Embedding( py_vocab_size, d_word_vec, padding_idx=Constants.PAD) #self.hz_vocab = hz_vocab self.d_word_vec =d_word_vec if hz_vocab_size is not None: self.cn_word_emb = nn.Embedding( hz_vocab_size, hp.hz_emb_size, padding_idx=Constants.PAD) else: self.hz_word_emb = None self.position_enc = nn.Parameter( get_sinusoid_encoding_table(n_position, d_word_vec).unsqueeze(0), requires_grad=False) self.layer_stack = nn.ModuleList([FFTBlock( d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers)])
def __init__(self, len_max_seq=hp.max_sep_len, d_word_vec=hp.word_vec_dim, n_layers=hp.decoder_n_layer, n_head=hp.decoder_head, d_k=64, d_v=64, d_model=hp.word_vec_dim, d_inner=hp.decoder_conv1d_filter_size, dropout=hp.dropout): super(Decoder, self).__init__() n_position = len_max_seq + 1 # self.src_word_emb = nn.Embedding( # n_src_vocab, d_word_vec, padding_idx=Constants.PAD) # self.encoder_prenet = EncoderPreNet() self.position_enc = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(n_position, d_word_vec, padding_idx=0), freeze=True) self.layer_stack = nn.ModuleList([ FFTBlock(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])
def __init__(self, n_src_vocab=len(symbols) + 1, len_max_seq=hp.max_seq_len, d_word_vec=hp.encoder_hidden, n_layers=hp.encoder_layer, n_head=hp.encoder_head, d_k=hp.encoder_hidden // hp.encoder_head, d_v=hp.encoder_hidden // hp.encoder_head, d_model=hp.encoder_hidden, d_inner=hp.fft_conv1d_filter_size, dropout=hp.encoder_dropout): super(Encoder, self).__init__() n_position = len_max_seq + 1 # <144, 256>, 0点的pad的embedding全是0 self.src_word_emb = nn.Embedding(n_src_vocab, d_word_vec, padding_idx=Constants.PAD) # <1, 1001, 256> self.position_enc = nn.Parameter(get_sinusoid_encoding_table( n_position, d_word_vec).unsqueeze(0), requires_grad=False) self.layer_stack = nn.ModuleList([ FFTBlock(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])
def __init__(self, len_max_seq=hp.max_seq_len, d_word_vec=hp.encoder_hidden, n_layers=hp.decoder_layer, n_head=hp.decoder_head, d_k=hp.decoder_hidden // hp.decoder_head, d_v=hp.decoder_hidden // hp.decoder_head, d_model=hp.decoder_hidden, d_inner=hp.fft_conv1d_filter_size, dropout=hp.decoder_dropout): super(Decoder, self).__init__() n_position = len_max_seq + 1 self.speaker_fc = nn.Linear(512, 256, bias=False) self.position_enc = nn.Parameter(get_sinusoid_encoding_table( n_position, d_word_vec).unsqueeze(0), requires_grad=False) self.layer_stack = nn.ModuleList([ FFTBlock(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])
def __init__(self, speaker_encoder_dim: int = 0): super(Decoder, self).__init__() d_word_vec = hp.encoder_hidden + speaker_encoder_dim d_k = (hp.decoder_hidden + speaker_encoder_dim) // hp.decoder_head d_v = (hp.decoder_hidden + speaker_encoder_dim) // hp.decoder_head d_model = (hp.decoder_hidden + speaker_encoder_dim) n_position = hp.max_seq_len + 1 self.position_enc = nn.Parameter(get_sinusoid_encoding_table( n_position, d_word_vec).unsqueeze(0), requires_grad=False) self.layer_stack = nn.ModuleList([ FFTBlock(d_model, hp.fft_conv1d_filter_size, hp.decoder_head, d_k, d_v, dropout=hp.decoder_dropout) for _ in range(hp.decoder_layer) ])
def __init__(self, len_max_seq=hp.max_seq_len, n_layers=hp.decoder_n_layer, n_head=hp.decoder_head, d_k=hp.decoder_dim // hp.decoder_head, d_v=hp.decoder_dim // hp.decoder_head, d_model=hp.decoder_dim, d_inner=hp.decoder_conv1d_filter_size, dropout=hp.dropout): super(Decoder, self).__init__() n_position = len_max_seq + 1 self.position_enc = nn.Embedding.from_pretrained( get_sinusoid_encoding_table(n_position, d_model, padding_idx=0), freeze=True) self.layer_stack = nn.ModuleList([ FFTBlock(d_model, d_inner, n_head, d_k, d_v, dropout=dropout) for _ in range(n_layers) ])