Beispiel #1
0
    def __init__(self, model_name, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF2, self).__init__()
        self.hidden_size = hidden_size * 2  # adding the char embedding, double the hidden_size.

        self.emb = layers.Embedding(model_name=model_name,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        #input_size=self.hidden_size+2 is due to we add two extra features (avg_attention) to both char embedding
        #and word embedding to boost the performance. The avg_attention is use the attention mechanism to learn
        #a weighted average among the vectors by the model itself.
        self.enc = layers.RNNEncoder(input_size=self.hidden_size + 2,
                                     hidden_size=self.hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.highway = layers.HighwayEncoder(2, 4 * hidden_size)

        self.mod = layers.RNNEncoder(input_size=2 * self.hidden_size,
                                     hidden_size=self.hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        #         self.sim = nn.CosineSimilarity(dim=1, eps=1e-6)

        self.qa_outputs = nn.Linear(2 * self.hidden_size, 2)
Beispiel #2
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF_charCNN, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.char_emb = layers.CharEmbedding(char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.hwy = layers.HighwayEncoder(2, 2*hidden_size)

        self.enc = layers.RNNEncoder(input_size=2*hidden_size,
                                     hidden_size=2*hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * 2*hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * 2*hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Beispiel #3
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.,twist_embeddings=False):
        super(BiDAF_charCNN_BERTEnc_BERTMod, self).__init__()
        
        ###
        self.twist_embeddings = twist_embeddings
        idx_list = []
        for i in range(hidden_size):
            idx_list.append(i)
            idx_list.append(hidden_size+i)
        self.register_buffer('idx_twist',torch.tensor(idx_list))
        ###
        
        
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.char_emb = layers.CharEmbedding(char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.hwy = layers.HighwayEncoder(2, 2*hidden_size)

        self.enc = bert_layers.BertEncoder(n_layers=3, #n_layers=4,
                                           d_feature=2*hidden_size, 
                                           n_heads=8,
                                           out_size=2*hidden_size,
                                           #d_ff=2048,
                                           d_ff = 2*hidden_size, 
                                           dropout_prob=0.1,
                                           #dropout_prob=drop_prob,
                                           ff_activation=F.relu)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)
        
        self.mod = bert_layers.BertEncoder(n_layers=3, #n_layers=3,
                                           d_feature=8*hidden_size, 
                                           n_heads=8,
                                           out_size=2*hidden_size,
                                           #d_ff=2048,
                                           d_ff = 2*hidden_size, 
                                           dropout_prob=0.1,
                                           #dropout_prob=drop_prob,
                                           ff_activation=F.relu)

        # self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
        #                              hidden_size=hidden_size,
        #                              num_layers=2,
        #                              drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Beispiel #4
0
 def __init__(self, model_name, hidden_size, drop_prob=0.):
     super(LSTM_highway, self).__init__()
     self.albert = AlbertModel.from_pretrained(model_name)
     input_size = size_map[model_name]
     self.enc = layers.RNNEncoder(input_size=input_size,
                                  hidden_size=hidden_size,
                                  num_layers=2,
                                  drop_prob=drop_prob)
     self.dec = layers.RNNEncoder(input_size=2 * hidden_size,
                                  hidden_size=hidden_size,
                                  num_layers=2,
                                  drop_prob=drop_prob)
     self.highway = layers.HighwayEncoder(2, 2 * hidden_size)
     self.qa_outputs = nn.Linear(2 * hidden_size, 2)
Beispiel #5
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()

        self.hidden_size = hidden_size

        self.word_emb = layers.WordEmbedding(word_vectors, hidden_size)
        self.char_emb = layers.CharEmbedding(char_vectors, hidden_size)

        # assert hidden_size * 2 == (char_channel_size + word_dim)

        # highway network
        self.hwy = layers.HighwayEncoder(2, hidden_size * 2)

        # highway network
        # for i in range(2):
        #     setattr(self, f'highway_linear{i}', nn.Sequential(
        #         nn.Linear(hidden_size * 2, hidden_size * 2), nn.ReLU()))

        #     setattr(self, f'hightway_gate{i}', nn.Sequential(
        #         nn.Linear(hidden_size * 2, hidden_size * 2), nn.Sigmoid()))

        # self.emb = layers.Embedding(word_vectors=word_vectors,
        #                             hidden_size=hidden_size,
        #                             drop_prob=drop_prob)

        self.enc = layers.RNNEncoder(input_size=hidden_size * 2,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Beispiel #6
0
    def __init__(self,
                 word_vectors,
                 char_vocab_size,
                 char_dim,
                 hidden_size,
                 drop_prob=0.,
                 kernel_size=5,
                 padding=1):
        super(BiDAF_Char, self).__init__()

        self.char_emb = layers.Char_Embedding(char_vocab_size=char_vocab_size,
                                              char_dim=char_dim,
                                              drop_prob=drop_prob,
                                              hidden_size=hidden_size,
                                              kernel_size=kernel_size,
                                              padding=padding)

        self.word_emb = layers.Word_Embedding(word_vectors=word_vectors,
                                              hidden_size=hidden_size,
                                              drop_prob=drop_prob)

        self.hwy = layers.HighwayEncoder(num_layers=2, hidden_size=hidden_size)

        self.enc = layers.RNNEncoder(input_size=2 * hidden_size, # 08/09 注意这里改了input_size。因为经过highway后char_emb+word_emb的concatenation (bs, seq_len, 2*h)
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
    def __init__(self,
                 word_vectors,
                 char_vectors,
                 device,
                 hidden_size,
                 drop_prob=0.):
        super(RNet, self).__init__()
        self.device = device
        self.word_emb = nn.Embedding.from_pretrained(word_vectors)
        self.char_emb = layers.CharEmbedding(char_vectors=char_vectors,
                                             e_char=char_vectors.size(1),
                                             e_word=word_vectors.size(1),
                                             drop_prob=drop_prob,
                                             freeze=False)

        self.proj = nn.Linear(word_vectors.size(1) * 2,
                              hidden_size,
                              bias=False)

        self.hwy = layers.HighwayEncoder(2, hidden_size)

        self.encoder = Encoder(input_size=hidden_size,
                               h_size=hidden_size,
                               device=device,
                               drop_prob=drop_prob)

        self.gatedAttn = GatedAttn(input_size=hidden_size,
                                   h_size=hidden_size,
                                   device=device,
                                   drop_prob=drop_prob)

        self.selfAttn = SelfAttn(self.gatedAttn.out_size,
                                 device=device,
                                 drop_prob=drop_prob)

        self.pointer = Pointer(self.selfAttn.out_size,
                               self.encoder.out_size,
                               device=device)
Beispiel #8
0
    def __init__(self,
                 word_vectors,
                 char_vec,
                 word_len,
                 emb_size,
                 enc_size=128,
                 drop_prob=0.1,
                 n_head=8,
                 LN_train=True,
                 DP_residual=False,
                 mask_pos=False,
                 two_pos=False,
                 rel=False,
                 total_prob=True,
                 final_prob=1.0,
                 freeze=True):
        super(QANet4, self).__init__()
        self.emb = layers.EmbeddingWithChar(word_vectors=word_vectors,
                                            hidden_size=emb_size,
                                            char_vec=char_vec,
                                            word_len=word_len,
                                            drop_prob=drop_prob,
                                            char_prop=0.4,
                                            hwy_drop=drop_prob,
                                            char_dim=200,
                                            bias=True,
                                            freeze=freeze,
                                            act='gelu')

        self.emb_resize = layers.Resizer(input_size=emb_size,
                                         output_size=enc_size,
                                         kernel_size=1,
                                         drop_prob=0,
                                         bias=True)

        self.pos_emb = layers.PosEmbeddings(hidden_size=enc_size,
                                            drop_prob=0,
                                            para_limit=1000,
                                            scale=False,
                                            from_pretrained=True,
                                            freeze=True)

        self.emb_enc = layers.EncoderBlock3(enc_size=enc_size,
                                            para_limit=1000,
                                            n_conv=4,
                                            kernel_size=7,
                                            drop_prob=drop_prob,
                                            n_head=n_head,
                                            att_drop_prob=drop_prob,
                                            final_prob=final_prob,
                                            LN_train=LN_train,
                                            DP_residual=DP_residual,
                                            mask_pos=mask_pos,
                                            two_pos=two_pos,
                                            rel=rel,
                                            act='gelu')

        self.att = layers.BiDAFAttention(hidden_size=enc_size,
                                         drop_prob=drop_prob)

        self.gelu = layers.HighwayEncoder(num_layers=1,
                                          hidden_size=4 * enc_size,
                                          drop_prob=drop_prob,
                                          act='gelu')

        self.att_resize = layers.Resizer(input_size=4 * enc_size,
                                         output_size=enc_size,
                                         kernel_size=1,
                                         drop_prob=0,
                                         bias=True)

        self.model_enc = layers.StackedEncoderBlocks(n_blocks=7,
                                                     hidden_size=enc_size,
                                                     para_limit=1000,
                                                     n_conv=2,
                                                     kernel_size=5,
                                                     drop_prob=drop_prob,
                                                     n_head=n_head,
                                                     att_drop_prob=drop_prob,
                                                     final_prob=final_prob,
                                                     LN_train=LN_train,
                                                     DP_residual=DP_residual,
                                                     mask_pos=mask_pos,
                                                     two_pos=two_pos,
                                                     rel=rel,
                                                     total_prob=total_prob,
                                                     act='gelu')

        self.out_beg = layers.OutputBlock(enc_size)
        self.out_end = layers.OutputBlock(enc_size)

        self.drop = nn.Dropout(drop_prob)
Beispiel #9
0
 def __init__(self, model_name):
     super(AlbertLinear_highway, self).__init__()
     self.albert = AlbertModel.from_pretrained(model_name)
     input_dim = size_map[model_name]
     self.qa_outputs = nn.Linear(input_dim, 2)
     self.enc = layers.HighwayEncoder(3, input_dim)
    def __init__(self,
                 word_vectors,
                 hidden_size,
                 use_char=False,
                 char_vectors=None,
                 use_syll=False,
                 syll_vectors=None,
                 drop_prob=0.):

        super(BiDAF, self).__init__()
        self.word_emb_size = word_vectors.size(1)

        self.emb = layers.WordEmbedding(word_vectors=word_vectors,
                                        hidden_size=hidden_size,
                                        drop_prob=drop_prob)

        self.use_char = use_char
        self.use_syll = use_syll

        if use_char and use_syll:
            self.char_emb = layers.CharEmbedding(char_vectors,
                                                 e_char=char_vectors.size(1),
                                                 e_word=hidden_size,
                                                 drop_prob=drop_prob,
                                                 freeze=False)
            self.syll_emb = layers.SyllEmbedding(syll_vectors,
                                                 e_syll=syll_vectors.size(1),
                                                 e_word=hidden_size,
                                                 drop_prob=drop_prob,
                                                 freeze=False)
            self.input_size = self.word_emb_size + 2 * hidden_size
        elif use_char:
            self.char_emb = layers.CharEmbedding(char_vectors,
                                                 e_char=char_vectors.size(1),
                                                 e_word=hidden_size,
                                                 drop_prob=drop_prob,
                                                 freeze=False)
            self.input_size = self.word_emb_size + hidden_size
        elif use_syll:
            self.syll_emb = layers.SyllEmbedding(syll_vectors,
                                                 e_syll=syll_vectors.size(1),
                                                 e_word=hidden_size,
                                                 drop_prob=drop_prob,
                                                 freeze=False)
            self.input_size = self.word_emb_size + hidden_size
        else:
            self.input_size = self.word_emb_size

        self.proj = nn.Linear(self.input_size, hidden_size, bias=False)
        self.hwy = layers.HighwayEncoder(2, hidden_size)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)