def __init__(self,
                 d_word_vec=77,
                 n_layers=3,
                 n_head=1,
                 d_k=16,
                 d_v=16,
                 d_model=77,
                 d_inner=16,
                 dropout=0.1,
                 n_position=200,
                 seq_len=15,
                 con_size=3,
                 days=1,
                 kernel='linear',
                 kernel_size_tcn=3,
                 kernel_size_scn=2):

        super().__init__()
        self.encoder = Encoder(d_word_vec,
                               n_layers,
                               n_head,
                               d_k,
                               d_v,
                               d_model,
                               d_inner,
                               dropout,
                               n_position,
                               kernel=kernel,
                               kernel_size_tcn=kernel_size_tcn,
                               kernel_size_scn=kernel_size_scn)
        self.con1 = nn.Conv1d(d_model, days, con_size)
        self.ff1 = nn.Linear(seq_len - con_size + 1, d_word_vec)
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
Beispiel #2
0
    def __init__(self,
                 n_src_vocab,
                 n_max_seq,
                 n_layers=2,
                 n_head=2,
                 d_word_vec=100,
                 d_model=100,
                 d_inner_hid=100,
                 d_k=100,
                 d_v=100,
                 dropout=0.1,
                 proj_share_weight=True):

        super(Decepticon, self).__init__()
        self.encoder = Encoder(n_src_vocab,
                               n_max_seq,
                               n_layers=n_layers,
                               n_head=n_head,
                               d_word_vec=d_word_vec,
                               d_model=d_model,
                               d_inner_hid=d_inner_hid,
                               dropout=dropout)

        assert d_model == d_word_vec, 'To facilitate the residual connections' \
                'the dimensions of all module output shall be the same.'
    def __init__(self, in_channels=13, len_max_seq=100,
            d_word_vec=512, d_model=512, d_inner=2048,
            n_layers=6, n_head=8, d_k=64, d_v=64,
            dropout=0.2, nclasses=6):

        super(TransformerEncoder, self).__init__()

        self.d_model = 512

        self.inlayernorm = nn.LayerNorm(in_channels)
        self.convlayernorm = nn.LayerNorm(d_model)
        self.outlayernorm = nn.LayerNorm(d_model)

        self.inconv = torch.nn.Conv1d(in_channels, d_model, 1)

        self.encoder = Encoder(
            n_src_vocab=None, len_max_seq=len_max_seq,
            d_word_vec=d_word_vec, d_model=d_model, d_inner=d_inner,
            n_layers=n_layers, n_head=n_head, d_k=d_k, d_v=d_v,
            dropout=dropout)

        self.outlinear = nn.Linear(d_model, nclasses, bias=False)

        self.tempmaxpool = nn.MaxPool1d(len_max_seq)

        self.logsoftmax = nn.LogSoftmax(dim=-1)
Beispiel #4
0
 def __init__(self, use_postnet=True, n_spkers=1):
     super(FastSpeech2, self).__init__()
     
     ### Speaker Embedding Table ###
     self.use_spk_embed = hp.use_spk_embed
     if self.use_spk_embed:
         self.n_spkers = n_spkers
         self.spk_embed_dim = hp.spk_embed_dim
         self.spk_embed_weight_std = hp.spk_embed_weight_std
         self.embed_speakers = Embedding(n_spkers, self.spk_embed_dim, padding_idx=None, std=self.spk_embed_weight_std)
         
     self.use_emo_embed = hp.use_emo_embed
     if self.use_emo_embed:
         self.n_emotes = n_emotes
         self.emo_embed_dim = hp.emo_embed_dim
         self.emo_embed_weight_std = hp.emo_embed_weight_std
         self.embed_emotions = Embedding(n_emotes, self.emo_embed_dim, padding_idx=None, std=self.emo_embed_weight_std)
     
     ### Encoder, Speaker Integrator, Variance Adaptor, Deocder, Postnet ###
     self.encoder = Encoder()
     if self.use_spk_embed:
         self.speaker_integrator = SpeakerIntegrator()
     self.variance_adaptor = VarianceAdaptor()
     self.decoder = Decoder()
     self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)
     self.use_postnet = use_postnet
     if self.use_postnet:
         self.postnet = PostNet()
Beispiel #5
0
    def __init__(self):
        super(FastSpeech, self).__init__()

        self.encoder = Encoder()
        self.length_regulator = LengthRegulator()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_output_size, hp.num_mels)
        self.postnet = PostNet()
    def __init__(self):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder()
        self.variance_adaptor = VarianceAdaptor()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_hidden, hp.n_mel_channels)
        self.postnet = PostNet()
Beispiel #7
0
    def __init__(self):
        super(FastSpeech, self).__init__()

        self.encoder = Encoder()
        self.length_regulator = LengthRegulator()
        self.decoder = Decoder()

        self.mel_linear = Linear(hp.decoder_dim, hp.num_mels)
        self.postnet = CBHG(hp.num_mels, K=8, projections=[256, hp.num_mels])
        self.last_linear = Linear(hp.num_mels * 2, hp.num_mels)
Beispiel #8
0
 def __init__(self):
     super(StyleEncoder, self).__init__()
     self.text_encoder = Encoder()
     self.audio_encoder = AudioEncoder()
     self.text_linear_down = nn.Sequential(
         nn.Linear(hp.encoder_hidden, hp.va_neck_hidden_t), nn.ReLU())
     self.speaker_linear_p = nn.Sequential(
         nn.Linear(hp.speaker_embed_dim, hp.va_neck_hidden_p * 2),
         nn.ReLU())
     self.speaker_linear = nn.Sequential(
         nn.Linear(hp.speaker_embed_dim, hp.encoder_hidden), nn.ReLU())
    def __init__(self,  py_vocab_size,hz_vocab_size=None, use_postnet=True):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder(py_vocab_size, hz_vocab_size = hz_vocab_size)
        self.variance_adaptor = VarianceAdaptor()

        self.decoder = Decoder()
        self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)

        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = UNet(scale=8)
    def __init__(self, use_postnet=True):
        super(FastSpeech2, self).__init__()

        self.encoder = Encoder()
        self.variance_adaptor = TacotronDuration()

        self.decoder = Decoder()
        self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)

        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = PostNet()
Beispiel #11
0
    def __init__(self, use_postnet=True):
        super(FastSpeech2, self).__init__()
        
#         self.gst = GST()
        self.encoder = Encoder()
        self.variance_adaptor = VarianceAdaptor()

        self.decoder = Decoder()
        
        if hp.vocoder=='WORLD':
#             self.f0_decoder= Decoder()
            self.ap_linear = nn.Linear(hp.decoder_hidden, hp.n_ap_channels)
            self.sp_linear = nn.Linear(hp.decoder_hidden, hp.n_sp_channels)
        else:
            self.mel_linear = nn.Linear(hp.decoder_hidden, hp.n_mel_channels)
        
        self.use_postnet = use_postnet
        if self.use_postnet:
            self.postnet = PostNet()
Beispiel #12
0
    def __init__(self, hparams):
        super().__init__()
        self.hparams = hparams
        hparams = self.hparams  # a['key'] (if so) -> a.key

        self.enc = Encoder(
            n_src_vocab=hparams.vocab_size, len_max_seq=hparams.max_len,
            d_word_vec=hparams.d_model, d_model=hparams.d_model,
            d_inner=hparams.d_inner_hid, d_k=hparams.d_k, d_v=hparams.d_v,
            n_layers=hparams.n_layers, n_head=hparams.n_head,
            dropout=hparams.dropout)

        self.word = nn.Linear(hparams.d_model, hparams.vocab_size, bias=False)
        nn.init.xavier_normal_(self.word.weight)
        self.x_logit_scale = 1.
        if hparams.share_emb_prj_weight:
            self.word.weight = self.enc.src_word_emb.weight
            self.x_logit_scale = (hparams.d_model ** -0.5)

        self.loc = nn.Linear(hparams.d_model, 1)
Beispiel #13
0
    def __init__(self,
                 src_vocab_size,
                 tgt_vocab_size,
                 n_layer=6,
                 d_model=512,
                 d_ff=2048,
                 n_head=8,
                 dropout=0.1):
        super(Transformer, self).__init__()
        self.src_embed = nn.Sequential(Embeddings(d_model, src_vocab_size),
                                       PositionalEncoding(d_model, dropout))
        self.tgt_embed = nn.Sequential(Embeddings(d_model, tgt_vocab_size),
                                       PositionalEncoding(d_model, dropout))
        self.encoder = Encoder(n_head, d_model, d_ff, dropout, n_layer)
        self.decoder = Decoder(n_head, d_model, d_ff, dropout, n_layer)
        self.generator = Generator(d_model, tgt_vocab_size)

        # Initialize parameters with Glorot / fan_avg.
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
    def __init__(self, d_src_vec, len_seq, d_emb_vec, n_layers, n_head, d_k,
                 d_v, d_inner, dropout):

        super(model, self).__init__()
        self.d_src_vec = d_src_vec
        self.d_emb_vec = d_emb_vec
        self.len_seq = len_seq
        self.n_layers = n_layers
        self.n_head = n_head
        self.dropout = dropout
        self.d_inner = d_inner

        self.ffn = ffn_compressed(d_in=self.d_src_vec,
                                  d_hid=self.d_inner,
                                  d_out=self.d_emb_vec)

        self.encoder = Encoder(len_seq=self.len_seq,
                               d_word_vec=self.d_emb_vec,
                               n_layers=self.n_layers,
                               n_head=self.n_head,
                               d_k=self.d_emb_vec // self.n_head,
                               d_v=self.d_emb_vec // self.n_head,
                               d_inner=self.d_inner,
                               dropout=self.dropout)

        #Fully connected. Seems to have a lot of params
        #        self.FC1 = nn.Linear(self.d_emb_vec * self.len_seq , 64)
        #        self.FC2 = nn.Linear(64, 8)
        #        self.FC3 = nn.Linear(8, 2)

        #        #Average pooling over features
        #        self.avg_pooling = nn.AvgPool1d(d_emb_vec-1, stride=1)  #d_emb_vec-1: To have 2 classes
        #        self.FC = nn.Linear(len_seq * 2, 2)  #2: binary classification
        #        self.softmax = nn.Softmax(dim=-1)

        #Average pooling over sequence
        self.avg_pooling = nn.AvgPool1d(
            len_seq, stride=1)  #self.len_seq: To have 1 averaged token
        self.FC = nn.Linear(d_emb_vec, 2)  #2: binary classification
        self.softmax = nn.Softmax(dim=-1)
Beispiel #15
0
import torch.nn.functional as F
import torch.optim as optim
from torchtext.data import Field, Dataset, BucketIterator
from torchtext.datasets import TranslationDataset

import transformer.Constants as Constants
from transformer.Layers import EncoderLayer
from transformer.Models import Transformer, Encoder
from transformer.Optim import ScheduledOptim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

encoder_stacks = Encoder(d_model=32,
                         d_inner=64,
                         n_layers=2,
                         n_head=4,
                         d_k=16,
                         d_v=16,
                         dropout=0.1)

criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.SGD(encoder_stacks.parameters(), lr=1)

src = torch.rand(1, 2, 32, requires_grad=True)
tgt = torch.rand(1, 2, 32)

print(src)

encoder_stacks.train()

for i in range(100):
Beispiel #16
0
    def __init__(self,
                 source_dataset,
                 batch_size,
                 epochs,
                 window_size,
                 device,
                 plot_file,
                 train_data,
                 test_data,
                 valid_data,
                 target_column,
                 target_min,
                 target_max,
                 d_inner,
                 n_layers,
                 n_head_,
                 d_k,
                 d_v,
                 n_warmup_steps,
                 criterion,
                 target_name,
                 d_model,
                 model_file=None,
                 load_data=False,
                 load_model=False):
        self.data_frame = self.read_dataset(source_dataset)
        self.batch_size = batch_size
        self.epochs = epochs
        self.device = device
        self.target_column = target_column
        self.window = window_size
        self.plot_file = plot_file
        self.n_layers = n_layers
        self.n_head = n_head_
        self.d_inner = d_inner
        self.warmup_step = n_warmup_steps
        self.d_k = d_k
        self.d_v = d_v
        self.d_model = d_model
        self.target_name = target_name
        self.input_mask = torch.ones([self.batch_size, 1, self.window],
                                     dtype=torch.int,
                                     device=device)
        self.target_max = target_max
        self.target_min = target_min
        self.model_file = model_file
        self.prev_epoch = 0
        if load_data:
            self.train_df = pd.read_csv(train_data)
            self.test_df = pd.read_csv(test_data)
            self.valid_df = pd.read_csv(valid_data)
        else:
            self.train_df, self.valid_df, self.test_df = self.organize_dataset(
                train_data, test_data, valid_data)

        pad_col = [
            'col' + str(i) for i in range(self.train_df.shape[1], self.d_model)
        ]
        for col in pad_col:
            self.train_df[col] = 0
            self.test_df[col] = 0
            self.valid_df[col] = 0
        self.columns = self.train_df.shape[1]
        self.model = Encoder(n_position=200,
                             d_word_vec=self.columns,
                             d_model=self.columns,
                             d_inner=d_inner,
                             n_layers=n_layers,
                             n_head=n_head_,
                             d_k=d_k,
                             d_v=d_v,
                             dropout=0).to(device)

        if load_model:
            self.model = torch.load(self.model_file)['model']
            self.model.eval()
            self.model = self.model.to(device)
            self.prev_epoch = torch.load(self.model_file)['epoch']

        self.criterion = criterion
        self.optimizer = ScheduledOptim(
            optim.Adam(self.model.parameters(), betas=(0.9, 0.98), eps=1e-09),
            2.0,
            self.columns,
            n_warmup_steps,
            n_step=self.prev_epoch * (math.floor(
                self.train_df.shape[0] / self.window * self.batch_size)))
        self.loss_list = []
        self.lr_list = []
Beispiel #17
0
            return mel_output, mel_output_postnet, duration_predictor_output
        else:
            length_regulator_output, decoder_pos = self.length_regulator(
                encoder_output, encoder_mask, alpha=alpha)

            decoder_output = self.decoder(length_regulator_output, decoder_pos)

            mel_output = self.mel_linear(decoder_output)
            mel_output_postnet = self.postnet(mel_output) + mel_output

            return mel_output, mel_output_postnet


if __name__ == "__main__":
    # Test
    test_encoder = Encoder()
    test_decoder = Decoder()
    # print(test_encoder)
    # print(test_decoder)

    test_src = torch.stack([
        torch.Tensor([1, 2, 4, 3, 2, 5, 0, 0]),
        torch.Tensor([3, 4, 2, 6, 7, 1, 2, 3])
    ]).long()
    test_pos = torch.stack([
        torch.Tensor([1, 2, 3, 4, 5, 6, 0, 0]),
        torch.Tensor([1, 2, 3, 4, 5, 6, 7, 8])
    ]).long()
    test_target = torch.stack([
        torch.Tensor([0, 2, 3, 0, 3, 2, 1, 0]),
        torch.Tensor([1, 2, 3, 2, 2, 0, 3, 6])