Beispiel #1
0
    def __init__(self,
                 vocab_size,
                 embedding_size=200,
                 embedding_dropout=0,
                 hidden_size=200,
                 dropout=0.0,
                 batch_first=True,
                 max_seq_len=32,
                 load_embedding=None,
                 word2id=None,
                 id2word=None):

        super(RNNLM, self).__init__()

        # define embeddings
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size

        self.load_embedding = load_embedding
        self.word2id = word2id
        self.id2word = id2word

        # define model param
        self.hidden_size = hidden_size

        # define operations
        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.dropout = nn.Dropout(dropout)

        # load embeddings
        if self.load_embedding:
            # import pdb; pdb.set_trace()
            embedding_matrix = np.random.rand(self.vocab_size,
                                              self.embedding_size)
            embedding_matrix = torch.FloatTensor(
                load_pretrained_embedding(self.word2id, embedding_matrix,
                                          self.load_embedding))
            self.embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                         freeze=False,
                                                         sparse=False,
                                                         padding_idx=PAD)
        else:
            self.embedder = nn.Embedding(self.vocab_size,
                                         self.embedding_size,
                                         sparse=False,
                                         padding_idx=PAD)

        # define lstm
        # embedding_size -> hidden_size
        self.lstm = torch.nn.LSTM(self.embedding_size,
                                  self.hidden_size,
                                  num_layers=1,
                                  batch_first=batch_first,
                                  bias=True,
                                  dropout=dropout,
                                  bidirectional=False)

        # define ffn
        self.out = nn.Linear(self.hidden_size, self.vocab_size, bias=False)
    def __init__(
            self,
            enc_vocab_size,
            dec_vocab_size,
            share_embedder,
            enc_embedding_size=200,
            dec_embedding_size=200,
            load_embedding_src=None,
            load_embedding_tgt=None,
            max_seq_len_src=32,
            max_seq_len_tgt=300,
            num_heads=8,
            dim_model=512,
            dim_feedforward=1024,
            enc_layers=6,
            dec_layers=6,
            embedding_dropout=0.0,
            dropout=0.2,
            act=False,
            enc_word2id=None,
            enc_id2word=None,
            dec_word2id=None,
            dec_id2word=None,
            transformer_type='standard',
            enc_emb_proj=False,
            dec_emb_proj=False,
            # pyramidal lstm params
            acous_dim=40,
            acous_hidden_size=256,
            # mode to select params to init
            mode='ASR',
            load_mode='ASR'  # useful for storing frozen var
    ):

        super(Seq2seq, self).__init__()
        self.EMB_DYN_AVE_PATH = \
        'models/base/ted-asr-v001/eval_ted_train_STATS/2020_09_02_04_10_44/dyn_emb_ave.npy'
        self.EMB_DYN_AVE = torch.from_numpy(np.load(self.EMB_DYN_AVE_PATH))

        # define var
        self.enc_vocab_size = enc_vocab_size
        self.dec_vocab_size = dec_vocab_size
        self.enc_embedding_size = enc_embedding_size
        self.dec_embedding_size = dec_embedding_size
        self.load_embedding_src = load_embedding_src
        self.load_embedding_tgt = load_embedding_tgt
        self.max_seq_len_src = max_seq_len_src
        self.max_seq_len_tgt = max_seq_len_tgt
        self.num_heads = num_heads
        self.dim_model = dim_model
        self.dim_feedforward = dim_feedforward

        self.enc_layers = enc_layers
        self.dec_layers = dec_layers

        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.dropout = nn.Dropout(dropout)
        self.act = act
        self.enc_emb_proj = enc_emb_proj
        self.dec_emb_proj = dec_emb_proj

        self.enc_word2id = enc_word2id
        self.enc_id2word = enc_id2word
        self.dec_word2id = dec_word2id
        self.dec_id2word = dec_id2word
        self.transformer_type = transformer_type
        self.mode = mode
        self.load_mode = load_mode

        # ------------- define embedders -------------
        if self.load_embedding_src:
            embedding_matrix = np.random.rand(self.enc_vocab_size,
                                              self.enc_embedding_size)
            embedding_matrix = torch.FloatTensor(
                load_pretrained_embedding(self.enc_word2id, embedding_matrix,
                                          self.load_embedding_src))
            self.enc_embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                             freeze=False,
                                                             sparse=False,
                                                             padding_idx=PAD)
        else:
            self.enc_embedder = nn.Embedding(self.enc_vocab_size,
                                             self.enc_embedding_size,
                                             sparse=False,
                                             padding_idx=PAD)

        if self.load_embedding_tgt:
            embedding_matrix = np.random.rand(self.dec_vocab_size,
                                              self.dec_embedding_size)
            embedding_matrix = torch.FloatTensor(
                load_pretrained_embedding(self.dec_word2id, embedding_matrix,
                                          self.load_embedding_tgt))
            self.dec_embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                             freeze=False,
                                                             sparse=False,
                                                             padding_idx=PAD)
        else:
            self.dec_embedder = nn.Embedding(self.dec_vocab_size,
                                             self.dec_embedding_size,
                                             sparse=False,
                                             padding_idx=PAD)

        if share_embedder:
            assert enc_vocab_size == dec_vocab_size
            self.enc_embedder = self.dec_embedder

        self.enc_emb_proj_flag = True
        self.enc_emb_proj = nn.Linear(
            self.enc_embedding_size + self.dim_model,
            self.dim_model,
            bias=False)  # static + dynamic embedding -> hidden

        self.dec_emb_proj_flag = False
        if (self.dec_embedding_size != self.dim_model) or (self.dec_emb_proj
                                                           == True):
            self.dec_emb_proj_flag = True
            self.dec_emb_proj = nn.Linear(self.dec_embedding_size,
                                          self.dim_model,
                                          bias=False)  # embedding -> hidden

        # ------------- construct enc, dec  -------------------
        # params
        self.acous_dim = acous_dim
        self.acous_hidden_size = acous_hidden_size
        enc_params = (self.dim_model, self.dim_feedforward, self.num_heads,
                      self.enc_layers, self.act, dropout,
                      self.transformer_type)
        dec_params = (self.dim_model, self.dim_feedforward, self.num_heads,
                      self.dec_layers, self.act, dropout,
                      self.transformer_type)

        # LAS
        comb_mode = '-'.join([self.mode, self.load_mode])
        if 'ASR' in comb_mode or 'ST' in comb_mode:
            self.las = LAS(
                self.enc_vocab_size,
                embedding_size=self.enc_embedding_size,
                acous_dim=self.acous_dim,
                acous_hidden_size=self.acous_hidden_size,
                acous_att_mode='bilinear',
                hidden_size_dec=self.dim_model,
                hidden_size_shared=self.dim_model,
                num_unilstm_dec=3,
                #
                acous_norm=True,
                spec_aug=True,
                batch_norm=False,
                enc_mode='pyramid',
                #
                embedding_dropout=embedding_dropout,
                dropout=dropout,
                residual=True,
                batch_first=True,
                max_seq_len=self.max_seq_len_src,
                embedder=None,  # do not share embedder with text encoder
                word2id=self.enc_word2id,
                id2word=self.enc_id2word,
                hard_att=False)

        # En decode
        if 'AE' in comb_mode:
            self.out_src = self.las.decoder.acous_out  # share with las out layer

        # En encode
        # De decode
        if 'ST' in comb_mode or 'MT' in comb_mode:
            self.enc_src = Encoder(*enc_params)
            self.dec_tgt = Decoder(*dec_params)
            self.out_tgt = nn.Linear(self.dim_model,
                                     self.dec_vocab_size,
                                     bias=False)
Beispiel #3
0
    def __init__(
            self,
            # params
            vocab_size,
            embedding_size=200,
            acous_hidden_size=256,
            acous_att_mode='bahdanau',
            hidden_size_dec=200,
            hidden_size_shared=200,
            num_unilstm_dec=4,
            #
            add_acous=True,
            acous_norm=False,
            spec_aug=False,
            batch_norm=False,
            enc_mode='pyramid',
            use_type='char',
            #
            add_times=False,
            #
            embedding_dropout=0,
            dropout=0.0,
            residual=True,
            batch_first=True,
            max_seq_len=32,
            load_embedding=None,
            word2id=None,
            id2word=None,
            hard_att=False,
            use_gpu=False):

        super(LAS, self).__init__()
        # config device
        if use_gpu and torch.cuda.is_available():
            global device
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        # define model
        self.acous_dim = 40
        self.acous_hidden_size = acous_hidden_size
        self.acous_att_mode = acous_att_mode
        self.hidden_size_dec = hidden_size_dec
        self.hidden_size_shared = hidden_size_shared
        self.num_unilstm_dec = num_unilstm_dec

        # define var
        self.hard_att = hard_att
        self.residual = residual
        self.use_type = use_type
        self.max_seq_len = max_seq_len

        # tuning
        self.add_acous = add_acous
        self.acous_norm = acous_norm
        self.spec_aug = spec_aug
        self.batch_norm = batch_norm
        self.enc_mode = enc_mode

        # add time stamps
        self.add_times = add_times

        # use shared embedding + vocab
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.load_embedding = load_embedding
        self.word2id = word2id
        self.id2word = id2word

        # define operations
        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.dropout = nn.Dropout(dropout)

        # ------- load embeddings --------
        if self.use_type != 'bpe':
            if self.load_embedding:
                embedding_matrix = np.random.rand(self.vocab_size,
                                                  self.embedding_size)
                embedding_matrix = load_pretrained_embedding(
                    self.word2id, embedding_matrix, self.load_embedding)
                embedding_matrix = torch.FloatTensor(embedding_matrix)
                self.embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                             freeze=False,
                                                             sparse=False,
                                                             padding_idx=PAD)
            else:
                self.embedder = nn.Embedding(self.vocab_size,
                                             self.embedding_size,
                                             sparse=False,
                                             padding_idx=PAD)
        elif self.use_type == 'bpe':
            # BPE
            embedding_matrix = np.random.rand(self.vocab_size,
                                              self.embedding_size)
            embedding_matrix = load_pretrained_embedding_bpe(embedding_matrix)
            embedding_matrix = torch.FloatTensor(embedding_matrix).to(
                device=device)
            self.embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                         freeze=False,
                                                         sparse=False,
                                                         padding_idx=PAD)

        # ------- las model --------
        if self.add_acous and not self.add_times:

            # ------ define acous enc -------
            if self.enc_mode == 'pyramid':
                self.acous_enc_l1 = torch.nn.LSTM(self.acous_dim,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)
                self.acous_enc_l2 = torch.nn.LSTM(self.acous_hidden_size * 4,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)
                self.acous_enc_l3 = torch.nn.LSTM(self.acous_hidden_size * 4,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)
                self.acous_enc_l4 = torch.nn.LSTM(self.acous_hidden_size * 4,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)
                if self.batch_norm:
                    self.bn1 = nn.BatchNorm1d(self.acous_hidden_size * 2)
                    self.bn2 = nn.BatchNorm1d(self.acous_hidden_size * 2)
                    self.bn3 = nn.BatchNorm1d(self.acous_hidden_size * 2)
                    self.bn4 = nn.BatchNorm1d(self.acous_hidden_size * 2)

            elif self.enc_mode == 'cnn':
                pass

            # ------ define acous att --------
            dropout_acous_att = dropout
            self.acous_hidden_size_att = 0  # ignored with bilinear

            self.acous_key_size = self.acous_hidden_size * 2  # acous feats
            self.acous_value_size = self.acous_hidden_size * 2  # acous feats
            self.acous_query_size = self.hidden_size_dec  # use dec(words) as query
            self.acous_att = AttentionLayer(
                self.acous_query_size,
                self.acous_key_size,
                value_size=self.acous_value_size,
                mode=self.acous_att_mode,
                dropout=dropout_acous_att,
                query_transform=False,
                output_transform=False,
                hidden_size=self.acous_hidden_size_att,
                use_gpu=use_gpu,
                hard_att=False)

            # ------ define acous out --------
            self.acous_ffn = nn.Linear(self.acous_hidden_size * 2 +
                                       self.hidden_size_dec,
                                       self.hidden_size_shared,
                                       bias=False)
            self.acous_out = nn.Linear(self.hidden_size_shared,
                                       self.vocab_size,
                                       bias=True)

            # ------ define acous dec -------
            # embedding_size_dec + self.hidden_size_shared [200+200] -> hidden_size_dec [200]
            if not self.residual:
                self.dec = torch.nn.LSTM(self.embedding_size +
                                         self.hidden_size_shared,
                                         self.hidden_size_dec,
                                         num_layers=self.num_unilstm_dec,
                                         batch_first=batch_first,
                                         bias=True,
                                         dropout=dropout,
                                         bidirectional=False)
            else:
                self.dec = nn.Module()
                self.dec.add_module(
                    'l0',
                    torch.nn.LSTM(self.embedding_size +
                                  self.hidden_size_shared,
                                  self.hidden_size_dec,
                                  num_layers=1,
                                  batch_first=batch_first,
                                  bias=True,
                                  dropout=dropout,
                                  bidirectional=False))

                for i in range(1, self.num_unilstm_dec):
                    self.dec.add_module(
                        'l' + str(i),
                        torch.nn.LSTM(self.hidden_size_dec,
                                      self.hidden_size_dec,
                                      num_layers=1,
                                      batch_first=batch_first,
                                      bias=True,
                                      dropout=dropout,
                                      bidirectional=False))

        elif self.add_acous and self.add_times:

            # ------ define acous enc -------
            if self.enc_mode == 'ts-pyramid':
                self.acous_enc_l1 = torch.nn.LSTM(self.acous_dim,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)
                self.acous_enc_l2 = torch.nn.LSTM(self.acous_hidden_size * 4,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)
                self.acous_enc_l3 = torch.nn.LSTM(self.acous_hidden_size * 4,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)
                self.acous_enc_l4 = torch.nn.LSTM(self.acous_hidden_size * 4,
                                                  self.acous_hidden_size,
                                                  num_layers=1,
                                                  batch_first=batch_first,
                                                  bias=True,
                                                  dropout=dropout,
                                                  bidirectional=True)

            else:
                # default
                acous_enc_blstm_depth = 1
                self.acous_enc = torch.nn.LSTM(
                    self.acous_dim,
                    self.acous_hidden_size,
                    num_layers=acous_enc_blstm_depth,
                    batch_first=batch_first,
                    bias=True,
                    dropout=dropout,
                    bidirectional=True)

            # ------ define acous local att --------
            dropout_acous_att = dropout
            self.acous_hidden_size_att = 0  # ignored with bilinear

            self.acous_key_size = self.acous_hidden_size * 2  # acous feats
            self.acous_value_size = self.acous_hidden_size * 2  # acous feats
            self.acous_query_size = self.hidden_size_dec  # use dec(words) as query
            self.acous_att = AttentionLayer(
                self.acous_query_size,
                self.acous_key_size,
                value_size=self.acous_value_size,
                mode=self.acous_att_mode,
                dropout=dropout_acous_att,
                query_transform=False,
                output_transform=False,
                hidden_size=self.acous_hidden_size_att,
                use_gpu=use_gpu,
                hard_att=False)

        # ------ define dd classifier -------
        self.dd_blstm_size = 300
        self.dd_blstm_depth = 2

        self.dd_blstm = torch.nn.LSTM(self.embedding_size,
                                      self.dd_blstm_size,
                                      num_layers=self.dd_blstm_depth,
                                      batch_first=batch_first,
                                      bias=True,
                                      dropout=dropout,
                                      bidirectional=True)
        if self.add_acous:
            dd_in_dim = self.dd_blstm_size * 2 + self.acous_hidden_size * 2
        else:
            dd_in_dim = self.dd_blstm_size * 2

        # might need to change this
        self.dd_classify = nn.Sequential(
            nn.Linear(dd_in_dim, 50, bias=True),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(50, 50),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(50, 1),
            nn.Sigmoid(),
        )
Beispiel #4
0
    def __init__(self,
                 enc_vocab_size,
                 dec_vocab_size,
                 share_embedder,
                 enc_embedding_size=200,
                 dec_embedding_size=200,
                 load_embedding_src=None,
                 load_embedding_tgt=None,
                 max_seq_len=32,
                 num_heads=8,
                 dim_model=512,
                 dim_feedforward=1024,
                 enc_layers=6,
                 dec_layers=6,
                 embedding_dropout=0.0,
                 dropout=0.2,
                 act=False,
                 enc_word2id=None,
                 enc_id2word=None,
                 dec_word2id=None,
                 dec_id2word=None,
                 transformer_type='standard'):

        super(Seq2seq, self).__init__()

        # define var
        self.enc_vocab_size = enc_vocab_size
        self.dec_vocab_size = dec_vocab_size
        self.enc_embedding_size = enc_embedding_size
        self.dec_embedding_size = dec_embedding_size
        self.load_embedding_src = load_embedding_src
        self.load_embedding_tgt = load_embedding_tgt
        self.max_seq_len = max_seq_len
        self.num_heads = num_heads
        self.dim_model = dim_model
        self.dim_feedforward = dim_feedforward

        self.enc_layers = enc_layers
        self.dec_layers = dec_layers

        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.dropout = nn.Dropout(dropout)
        self.act = act

        self.enc_word2id = enc_word2id
        self.enc_id2word = enc_id2word
        self.dec_word2id = dec_word2id
        self.dec_id2word = dec_id2word
        self.transformer_type = transformer_type

        # ------------- define embedder -------------
        if self.load_embedding_src:
            embedding_matrix = np.random.rand(self.enc_vocab_size,
                                              self.enc_embedding_size)
            embedding_matrix = torch.FloatTensor(
                load_pretrained_embedding(self.enc_word2id, embedding_matrix,
                                          self.load_embedding_src))
            self.enc_embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                             freeze=False,
                                                             sparse=False,
                                                             padding_idx=PAD)
        else:
            self.enc_embedder = nn.Embedding(self.enc_vocab_size,
                                             self.enc_embedding_size,
                                             sparse=False,
                                             padding_idx=PAD)

        if self.load_embedding_tgt:
            embedding_matrix = np.random.rand(self.dec_vocab_size,
                                              self.dec_embedding_size)
            embedding_matrix = torch.FloatTensor(
                load_pretrained_embedding(self.dec_word2id, embedding_matrix,
                                          self.load_embedding_tgt))
            self.dec_embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                             freeze=False,
                                                             sparse=False,
                                                             padding_idx=PAD)
        else:
            self.dec_embedder = nn.Embedding(self.dec_vocab_size,
                                             self.dec_embedding_size,
                                             sparse=False,
                                             padding_idx=PAD)

        if share_embedder:
            assert enc_vocab_size == dec_vocab_size
            self.enc_embedder = self.dec_embedder

        self.enc_emb_proj_flag = False
        if self.enc_embedding_size != self.dim_model:
            self.enc_emb_proj = nn.Linear(self.enc_embedding_size,
                                          self.dim_model,
                                          bias=False)  # embedding -> hidden
            self.enc_emb_proj_flag = True
        self.dec_emb_proj_flag = False
        if self.dec_embedding_size != self.dim_model:
            self.dec_emb_proj = nn.Linear(self.dec_embedding_size,
                                          self.dim_model,
                                          bias=False)  # embedding -> hidden
            self.dec_emb_proj_flag = True

        # ------------- construct enc, dec  -------------------
        if self.enc_layers > 0:
            enc_params = (self.dim_model, self.dim_feedforward, self.num_heads,
                          self.enc_layers, self.act, dropout,
                          self.transformer_type)
            self.enc = Encoder(*enc_params)
        if self.dec_layers > 0:
            dec_params = (self.dim_model, self.dim_feedforward, self.num_heads,
                          self.dec_layers, self.act, dropout,
                          self.transformer_type)
            self.dec = Decoder(*dec_params)

        # ------------- define out ffn -------------------
        self.out = nn.Linear(self.dim_model, self.dec_vocab_size, bias=False)
Beispiel #5
0
    def __init__(self,
                 vocab_size_enc,
                 embedding_size_enc=200,
                 embedding_dropout=0,
                 hidden_size_enc=200,
                 num_bilstm_enc=2,
                 num_unilstm_enc=0,
                 dropout=0.0,
                 residual=False,
                 batch_first=True,
                 max_seq_len=32,
                 load_embedding_src=None,
                 src_word2id=None,
                 src_id2word=None):

        super(EncRNN, self).__init__()

        # define embeddings
        self.vocab_size_enc = vocab_size_enc
        self.embedding_size_enc = embedding_size_enc

        self.load_embedding = load_embedding_src
        self.word2id = src_word2id
        self.id2word = src_id2word

        # define model param
        self.hidden_size_enc = hidden_size_enc
        self.num_bilstm_enc = num_bilstm_enc
        self.num_unilstm_enc = num_unilstm_enc
        self.residual = residual

        # define operations
        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.dropout = nn.Dropout(dropout)

        # load embeddings
        if self.load_embedding:
            # import pdb; pdb.set_trace()
            embedding_matrix = np.random.rand(self.vocab_size_enc,
                                              self.embedding_size_enc)
            embedding_matrix = torch.FloatTensor(
                load_pretrained_embedding(self.word2id, embedding_matrix,
                                          self.load_embedding))
            self.embedder_enc = nn.Embedding.from_pretrained(embedding_matrix,
                                                             freeze=False,
                                                             sparse=False,
                                                             padding_idx=PAD)
        else:
            self.embedder_enc = nn.Embedding(self.vocab_size_enc,
                                             self.embedding_size_enc,
                                             sparse=False,
                                             padding_idx=PAD)

        # define enc
        # embedding_size_enc -> hidden_size_enc * 2
        self.enc = torch.nn.LSTM(self.embedding_size_enc,
                                 self.hidden_size_enc,
                                 num_layers=self.num_bilstm_enc,
                                 batch_first=batch_first,
                                 bias=True,
                                 dropout=dropout,
                                 bidirectional=True)

        if self.num_unilstm_enc != 0:
            if not self.residual:
                self.enc_uni = torch.nn.LSTM(self.hidden_size_enc * 2,
                                             self.hidden_size_enc * 2,
                                             num_layers=self.num_unilstm_enc,
                                             batch_first=batch_first,
                                             bias=True,
                                             dropout=dropout,
                                             bidirectional=False)
            else:
                self.enc_uni = nn.Module()
                for i in range(self.num_unilstm_enc):
                    self.enc_uni.add_module(
                        'l' + str(i),
                        torch.nn.LSTM(self.hidden_size_enc * 2,
                                      self.hidden_size_enc * 2,
                                      num_layers=1,
                                      batch_first=batch_first,
                                      bias=True,
                                      dropout=dropout,
                                      bidirectional=False))
Beispiel #6
0
    def __init__(
            self,
            # add params
            vocab_size_enc,
            vocab_size_dec,
            embedding_size_enc=200,
            embedding_size_dec=200,
            embedding_dropout=0,
            hidden_size_enc=200,
            num_bilstm_enc=2,
            num_unilstm_enc=0,
            hidden_size_dec=200,
            num_unilstm_dec=2,
            hidden_size_att=10,
            hidden_size_shared=200,
            dropout=0.0,
            residual=False,
            batch_first=True,
            max_seq_len=32,
            batch_size=64,
            load_embedding_src=None,
            load_embedding_tgt=None,
            src_word2id=None,
            tgt_word2id=None,
            src_id2word=None,
            att_mode='bahdanau',
            hard_att=False,
            use_gpu=False,
            additional_key_size=0,
            ptr_net=False,
            use_bpe=False):

        super(Seq2Seq, self).__init__()
        # config device
        if use_gpu and torch.cuda.is_available():
            global device
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        # define var
        self.hidden_size_enc = hidden_size_enc
        self.num_bilstm_enc = num_bilstm_enc
        self.num_unilstm_enc = num_unilstm_enc
        self.hidden_size_dec = hidden_size_dec
        self.num_unilstm_dec = num_unilstm_dec
        self.hidden_size_att = hidden_size_att
        self.hidden_size_shared = hidden_size_shared
        self.batch_size = batch_size
        self.max_seq_len = max_seq_len
        self.use_gpu = use_gpu
        self.hard_att = hard_att
        self.additional_key_size = additional_key_size
        self.residual = residual
        self.ptr_net = ptr_net
        self.use_bpe = use_bpe

        # use shared embedding + vocab
        self.vocab_size = vocab_size_enc
        self.embedding_size = embedding_size_enc
        self.load_embedding = load_embedding_src
        self.word2id = src_word2id
        self.id2word = src_id2word

        # define operations
        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.dropout = nn.Dropout(dropout)
        self.beam_width = 0

        # load embeddings
        if not self.use_bpe:
            if self.load_embedding:
                embedding_matrix = np.random.rand(self.vocab_size,
                                                  self.embedding_size)
                embedding_matrix = load_pretrained_embedding(
                    self.word2id, embedding_matrix, self.load_embedding)
                embedding_matrix = torch.FloatTensor(embedding_matrix)
                self.embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                             freeze=False,
                                                             sparse=False,
                                                             padding_idx=PAD)
            else:
                self.embedder = nn.Embedding(self.vocab_size,
                                             self.embedding_size,
                                             sparse=False,
                                             padding_idx=PAD)
        else:
            # BPE
            embedding_matrix = np.random.rand(self.vocab_size,
                                              self.embedding_size)
            embedding_matrix = load_pretrained_embedding_bpe(embedding_matrix)
            embedding_matrix = torch.FloatTensor(embedding_matrix).to(
                device=device)
            self.embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                         freeze=False,
                                                         sparse=False,
                                                         padding_idx=PAD)

        self.embedder_enc = self.embedder
        self.embedder_dec = self.embedder

        # define enc
        # embedding_size_enc -> hidden_size_enc * 2
        self.enc = torch.nn.LSTM(self.embedding_size,
                                 self.hidden_size_enc,
                                 num_layers=self.num_bilstm_enc,
                                 batch_first=batch_first,
                                 bias=True,
                                 dropout=dropout,
                                 bidirectional=True)

        if self.num_unilstm_enc != 0:
            if not self.residual:
                self.enc_uni = torch.nn.LSTM(self.hidden_size_enc * 2,
                                             self.hidden_size_enc * 2,
                                             num_layers=self.num_unilstm_enc,
                                             batch_first=batch_first,
                                             bias=True,
                                             dropout=dropout,
                                             bidirectional=False)
            else:
                self.enc_uni = nn.Module()
                for i in range(self.num_unilstm_enc):
                    self.enc_uni.add_module(
                        'l' + str(i),
                        torch.nn.LSTM(self.hidden_size_enc * 2,
                                      self.hidden_size_enc * 2,
                                      num_layers=1,
                                      batch_first=batch_first,
                                      bias=True,
                                      dropout=dropout,
                                      bidirectional=False))

        # define dec
        # embedding_size_dec + self.hidden_size_shared [200+200] -> hidden_size_dec [200]
        if not self.residual:
            self.dec = torch.nn.LSTM(self.embedding_size +
                                     self.hidden_size_shared,
                                     self.hidden_size_dec,
                                     num_layers=self.num_unilstm_dec,
                                     batch_first=batch_first,
                                     bias=True,
                                     dropout=dropout,
                                     bidirectional=False)
        else:
            lstm_uni_dec_first = torch.nn.LSTM(self.embedding_size +
                                               self.hidden_size_shared,
                                               self.hidden_size_dec,
                                               num_layers=1,
                                               batch_first=batch_first,
                                               bias=True,
                                               dropout=dropout,
                                               bidirectional=False)
            self.dec = nn.Module()
            self.dec.add_module('l0', lstm_uni_dec_first)
            for i in range(1, self.num_unilstm_dec):
                self.dec.add_module(
                    'l' + str(i),
                    torch.nn.LSTM(self.hidden_size_dec,
                                  self.hidden_size_dec,
                                  num_layers=1,
                                  batch_first=batch_first,
                                  bias=True,
                                  dropout=dropout,
                                  bidirectional=False))

        # define att
        # query: hidden_size_dec [200]
        # keys: hidden_size_enc * 2 + (optional) self.additional_key_size [400]
        # values: hidden_size_enc * 2 [400]
        # context: weighted sum of values [400]
        self.key_size = self.hidden_size_enc * 2 + self.additional_key_size
        self.value_size = self.hidden_size_enc * 2
        self.query_size = self.hidden_size_dec
        self.att = AttentionLayer(self.query_size,
                                  self.key_size,
                                  value_size=self.value_size,
                                  mode=att_mode,
                                  dropout=dropout,
                                  query_transform=False,
                                  output_transform=False,
                                  hidden_size=self.hidden_size_att,
                                  use_gpu=self.use_gpu,
                                  hard_att=self.hard_att)

        # define output
        # (hidden_size_enc * 2 + hidden_size_dec) -> self.hidden_size_shared -> vocab_size_dec
        self.ffn = nn.Linear(self.hidden_size_enc * 2 + self.hidden_size_dec,
                             self.hidden_size_shared,
                             bias=False)
        self.out = nn.Linear(self.hidden_size_shared,
                             self.vocab_size,
                             bias=True)

        # define pointer weight
        if self.ptr_net == 'comb':
            self.ptr_i = nn.Linear(self.embedding_size, 1,
                                   bias=False)  #decoder input
            self.ptr_s = nn.Linear(self.hidden_size_dec, 1,
                                   bias=False)  #decoder state
            self.ptr_c = nn.Linear(self.hidden_size_enc * 2, 1,
                                   bias=True)  #context
Beispiel #7
0
	def __init__(self,
		vocab_size_dec,
		embedding_size_dec=200,
		embedding_dropout=0,
		hidden_size_enc=200,
		hidden_size_dec=200,
		num_unilstm_dec=2,
		att_mode='bahdanau',
		hidden_size_att=10,
		hidden_size_shared=200,
		dropout=0.0,
		residual=False,
		batch_first=True,
		max_seq_len=32,
		load_embedding_tgt=None,
		tgt_word2id=None,
		tgt_id2word=None
		):

		super(DecRNN, self).__init__()

		# define embeddings
		self.vocab_size_dec = vocab_size_dec
		self.embedding_size_dec = embedding_size_dec

		self.load_embedding = load_embedding_tgt
		self.word2id = tgt_word2id
		self.id2word = tgt_id2word

		# define model params
		self.hidden_size_enc = hidden_size_enc
		self.hidden_size_dec = hidden_size_dec
		self.num_unilstm_dec = num_unilstm_dec
		self.hidden_size_att = hidden_size_att
		self.hidden_size_shared = hidden_size_shared # [200]
		self.max_seq_len = max_seq_len
		self.residual = residual

		# define operations
		self.embedding_dropout = nn.Dropout(embedding_dropout)
		self.dropout = nn.Dropout(dropout)

		# load embeddings
		if self.load_embedding:
			# import pdb; pdb.set_trace()
			embedding_matrix = np.random.rand(self.vocab_size_dec, self.embedding_size_dec)
			embedding_matrix = torch.FloatTensor(load_pretrained_embedding(
				self.word2id, embedding_matrix, self.load_embedding))
			self.embedder_dec = nn.Embedding.from_pretrained(embedding_matrix,
				freeze=False, sparse=False, padding_idx=PAD)
		else:
			self.embedder_dec = nn.Embedding(self.vocab_size_dec,
				self.embedding_size_dec, sparse=False, padding_idx=PAD)

		# define dec
		# embedding_size_dec + self.hidden_size_shared [200+200] -> hidden_size_dec [200]
		if not self.residual:
			self.dec = torch.nn.LSTM(
				self.embedding_size_dec + self.hidden_size_shared,
				self.hidden_size_dec,
				num_layers=self.num_unilstm_dec, batch_first=batch_first,
				bias=True, dropout=dropout,bidirectional=False
			)
		else:
			lstm_uni_dec_first = torch.nn.LSTM(
				self.embedding_size_dec + self.hidden_size_shared,
				self.hidden_size_dec,
				num_layers=1, batch_first=batch_first,
				bias=True, dropout=dropout, bidirectional=False
			)
			self.dec = nn.Module()
			self.dec.add_module('l0', lstm_uni_dec_first)
			for i in range(1, self.num_unilstm_dec):
				self.dec.add_module(
					'l'+str(i),
					torch.nn.LSTM(self.hidden_size_dec, self.hidden_size_dec,
						num_layers=1, batch_first=batch_first, bias=True,
						dropout=dropout, bidirectional=False
					)
				)

		# define att
		# query: 	hidden_size_dec [200]
		# keys: 	hidden_size_enc * 2 [400]
		# values: 	hidden_size_enc * 2 [400]
		# context: 	weighted sum of values [400]
		self.key_size = self.hidden_size_enc * 2
		self.value_size = self.hidden_size_enc * 2
		self.query_size = self.hidden_size_dec
		self.att = AttentionLayer(
			self.query_size, self.key_size, value_size=self.value_size,
			mode=att_mode, dropout=dropout,
			query_transform=False, output_transform=False,
			hidden_size=self.hidden_size_att, hard_att=False)

		# define output
		# (hidden_size_enc * 2 + hidden_size_dec)
		# -> self.hidden_size_shared -> vocab_size_dec
		self.ffn = nn.Linear(self.hidden_size_enc * 2 + self.hidden_size_dec,
			self.hidden_size_shared, bias=False)
		self.out = nn.Linear(self.hidden_size_shared, self.vocab_size_dec, bias=True)
Beispiel #8
0
Datei: Dec.py Projekt: EdieLu/LAS
    def __init__(
            self,
            # params
            vocab_size,
            embedding_size=200,
            acous_hidden_size=256,
            acous_att_mode='bahdanau',
            hidden_size_dec=200,
            hidden_size_shared=200,
            num_unilstm_dec=4,
            use_type='char',
            #
            embedding_dropout=0,
            dropout=0.0,
            residual=True,
            batch_first=True,
            max_seq_len=32,
            load_embedding=None,
            word2id=None,
            id2word=None,
            hard_att=False,
            use_gpu=False):

        super(Dec, self).__init__()
        device = check_device(use_gpu)

        # define model
        self.acous_hidden_size = acous_hidden_size
        self.acous_att_mode = acous_att_mode
        self.hidden_size_dec = hidden_size_dec
        self.hidden_size_shared = hidden_size_shared
        self.num_unilstm_dec = num_unilstm_dec

        # define var
        self.hard_att = hard_att
        self.residual = residual
        self.max_seq_len = max_seq_len
        self.use_type = use_type

        # use shared embedding + vocab
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.load_embedding = load_embedding
        self.word2id = word2id
        self.id2word = id2word

        # define operations
        self.embedding_dropout = nn.Dropout(embedding_dropout)
        self.dropout = nn.Dropout(dropout)

        # ------- load embeddings --------
        if self.load_embedding:
            embedding_matrix = np.random.rand(self.vocab_size,
                                              self.embedding_size)
            embedding_matrix = load_pretrained_embedding(
                self.word2id, embedding_matrix, self.load_embedding)
            embedding_matrix = torch.FloatTensor(embedding_matrix)
            self.embedder = nn.Embedding.from_pretrained(embedding_matrix,
                                                         freeze=False,
                                                         sparse=False,
                                                         padding_idx=PAD)
        else:
            self.embedder = nn.Embedding(self.vocab_size,
                                         self.embedding_size,
                                         sparse=False,
                                         padding_idx=PAD)

        # ------ define acous att --------
        dropout_acous_att = dropout
        self.acous_hidden_size_att = 0  # ignored with bilinear

        self.acous_key_size = self.acous_hidden_size * 2  # acous feats
        self.acous_value_size = self.acous_hidden_size * 2  # acous feats
        self.acous_query_size = self.hidden_size_dec  # use dec(words) as query
        self.acous_att = AttentionLayer(self.acous_query_size,
                                        self.acous_key_size,
                                        value_size=self.acous_value_size,
                                        mode=self.acous_att_mode,
                                        dropout=dropout_acous_att,
                                        query_transform=False,
                                        output_transform=False,
                                        hidden_size=self.acous_hidden_size_att,
                                        use_gpu=use_gpu,
                                        hard_att=False)

        # ------ define acous out --------
        self.acous_ffn = nn.Linear(self.acous_hidden_size * 2 +
                                   self.hidden_size_dec,
                                   self.hidden_size_shared,
                                   bias=False)
        self.acous_out = nn.Linear(self.hidden_size_shared,
                                   self.vocab_size,
                                   bias=True)

        # ------ define acous dec -------
        # embedding_size_dec + self.hidden_size_shared [200+200]-> hidden_size_dec [200]
        if not self.residual:
            self.dec = torch.nn.LSTM(self.embedding_size +
                                     self.hidden_size_shared,
                                     self.hidden_size_dec,
                                     num_layers=self.num_unilstm_dec,
                                     batch_first=batch_first,
                                     bias=True,
                                     dropout=dropout,
                                     bidirectional=False)
        else:
            self.dec = nn.Module()
            self.dec.add_module(
                'l0',
                torch.nn.LSTM(self.embedding_size + self.hidden_size_shared,
                              self.hidden_size_dec,
                              num_layers=1,
                              batch_first=batch_first,
                              bias=True,
                              dropout=dropout,
                              bidirectional=False))

            for i in range(1, self.num_unilstm_dec):
                self.dec.add_module(
                    'l' + str(i),
                    torch.nn.LSTM(self.hidden_size_dec,
                                  self.hidden_size_dec,
                                  num_layers=1,
                                  batch_first=batch_first,
                                  bias=True,
                                  dropout=dropout,
                                  bidirectional=False))