Esempio n. 1
0
    def __init__(self, cl_logits_input_dim=None):
        self.layers = {}
        self.initialize_vocab()

        self.layers['embedding'] = layers_lib.Embedding(
            self.vocab_size,
            FLAGS.embedding_dims,
            FLAGS.normalize_embeddings,
            self.vocab_freqs,
            FLAGS.keep_prob_emb,
            name='embedding')
        self.layers['embedding_1'] = layers_lib.Embedding(
            self.vocab_size,
            FLAGS.embedding_dims,
            FLAGS.normalize_embeddings,
            self.vocab_freqs,
            FLAGS.keep_prob_emb,
            name='embedding_1')

        self.layers['cnn'] = layers_lib.CNN(FLAGS.embedding_dims,
                                            FLAGS.keep_prob_emb)
        self.layers['lstm_1'] = layers_lib.BiLSTM(FLAGS.rnn_cell_size,
                                                  FLAGS.rnn_num_layers,
                                                  name="Bilstm")
        action_type = 5 if FLAGS.action == 'all' else 4
        self.layers['action_select'] = layers_lib.Actionselect(
            action_type, FLAGS.keep_prob_dense, name='action_output')
        self.layers['cl_logits'] = layers_lib.Project_layer(
            FLAGS.num_classes, FLAGS.keep_prob_dense, name='project_layer')
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()
        self.hidden_size = 2 * hidden_size  # As we concatinating word vectors and Char
        # vectors
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = Encoder(dim=self.hidden_size,
                           depth=1,
                           heads=3,
                           ff_glu=True,
                           ff_dropout=self.drop_prob,
                           attn_dropout=self.drop_prob,
                           use_scalenorm=True,
                           position_infused_attn=True)

        self.att = layers.TBiDAFAttention(hidden_size=self.hidden_size,
                                          drop_prob=drop_prob)

        self.mod = Encoder(dim=2 * self.hidden_size,
                           depth=3,
                           heads=6,
                           ff_glu=True,
                           ff_dropout=self.drop_prob,
                           attn_dropout=self.drop_prob,
                           use_scalenorm=True,
                           position_infused_attn=True)

        self.out = layers.BiDAFOutput(hidden_size=self.hidden_size,
                                      drop_prob=self.drop_prob)
Esempio n. 3
0
    def __init__(self, model_name, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF2, self).__init__()
        self.hidden_size = hidden_size * 2  # adding the char embedding, double the hidden_size.

        self.emb = layers.Embedding(model_name=model_name,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        #input_size=self.hidden_size+2 is due to we add two extra features (avg_attention) to both char embedding
        #and word embedding to boost the performance. The avg_attention is use the attention mechanism to learn
        #a weighted average among the vectors by the model itself.
        self.enc = layers.RNNEncoder(input_size=self.hidden_size + 2,
                                     hidden_size=self.hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.highway = layers.HighwayEncoder(2, 4 * hidden_size)

        self.mod = layers.RNNEncoder(input_size=2 * self.hidden_size,
                                     hidden_size=self.hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        #         self.sim = nn.CosineSimilarity(dim=1, eps=1e-6)

        self.qa_outputs = nn.Linear(2 * self.hidden_size, 2)
Esempio n. 4
0
    def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.):
        super(BiDAF, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob,
                                    char_vectors = char_vectors)   # added last line

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)


        ### start our code:
        self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size,
                                                  hidden_size=hidden_size,
                                                  dropout = 0.2)

        ### end our code
        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 5
0
    def __init__(self, word_vectors, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()

        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)

        self.proj_bert_down = nn.Linear(in_features=768,
                                        out_features=hidden_size,
                                        bias=True)
        nn.init.xavier_uniform_(self.proj_bert_down.weight, gain=1)

        self.proj_glove_down = nn.Linear(in_features=300,
                                         out_features=hidden_size,
                                         bias=True)
        nn.init.xavier_uniform_(self.proj_glove_down.weight, gain=1)
Esempio n. 6
0
    def __init__(self,
                 word_vectors,
                 word_vectors_char,
                 hidden_size,
                 drop_prob=0.):
        super(BiDAF, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    word_vectors_char=word_vectors_char,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 7
0
    def __init__(self, word_vectors, hidden_size, use_pos, use_ner, drop_prob=0.):
        super(BiDAF, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.use_pos = use_pos
        self.use_ner = use_ner
        rnn_input_size = hidden_size
        if use_pos:
            rnn_input_size += 1
        if use_ner:
            rnn_input_size += 1
        self.enc = layers.RNNEncoder(input_size=rnn_input_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 8
0
 def __init__(self, word_vectors, hidden_size, drop_prob=0.):
     super(Paraphraser, self).__init__()
     # We load embeddings from a glove vector file.
     # embedding, drop, projection (linear), highway layer - todo: do we want all of these or just embedding?
     self.emb = layers.Embedding(word_vectors=word_vectors,
                                 hidden_size=hidden_size,
                                 drop_prob=drop_prob)
Esempio n. 9
0
    def __init__(self,
                 word_vectors,
                 char_vectors,
                 hidden_size,
                 num_heads=8,
                 drop_prob=0.):
        super(BiDAF, self).__init__()

        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        hidden_size *= 2  # update hidden size for other layers due to char embeddings

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 10
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.1):
        super(BiDAF, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        # self.enc = layers.RNNEncoder(input_size=hidden_size,
        #                              hidden_size=hidden_size,
        #                              num_layers=1,
        #                              drop_prob=drop_prob)

        self.emb_encoder = layers.EmbeddingEncoder(d_model=hidden_size, drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=4 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        # self.model_encoder = layers.ModelEncoder(d_model=hidden_size, drop_prob=drop_prob)
        #
        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 11
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF_charCNN, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.char_emb = layers.CharEmbedding(char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.hwy = layers.HighwayEncoder(2, 2*hidden_size)

        self.enc = layers.RNNEncoder(input_size=2*hidden_size,
                                     hidden_size=2*hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * 2*hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * 2*hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 12
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()

        # print("vectors: ", word_vectors)
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size,
                                             drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)

        self.batch_size = 64
        self.hidden_size = hidden_size
Esempio n. 13
0
    def __init__(self, cl_logits_input_dim=None):
        self.global_step = tf.train.get_or_create_global_step()
        self.vocab_freqs = _get_vocab_freqs()

        # Cache VatxtInput objects
        self.cl_inputs = None
        self.lm_inputs = None

        # Cache intermediate Tensors that are reused
        self.tensors = {}

        # Construct layers which are reused in constructing the LM and
        # Classification graphs. Instantiating them all once here ensures that
        # variable reuse works correctly.
        self.layers = {}
        self.layers['embedding'] = layers_lib.Embedding(
            FLAGS.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings,
            self.vocab_freqs, FLAGS.keep_prob_emb)
        self.layers['lstm'] = layers_lib.LSTM(FLAGS.rnn_cell_size,
                                              FLAGS.rnn_num_layers,
                                              FLAGS.keep_prob_lstm_out)
        self.layers['lm_loss'] = layers_lib.SoftmaxLoss(
            FLAGS.vocab_size,
            FLAGS.num_candidate_samples,
            self.vocab_freqs,
            name='LM_loss')

        cl_logits_input_dim = cl_logits_input_dim or FLAGS.rnn_cell_size
        self.layers['cl_logits'] = layers_lib.cl_logits_subgraph(
            [FLAGS.cl_hidden_size] * FLAGS.cl_num_layers, cl_logits_input_dim,
            FLAGS.num_classes, FLAGS.keep_prob_cl_hidden)
Esempio n. 14
0
    def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.):
        super(BiDAF, self).__init__()
        self.hidden_size = hidden_size

        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob,
                                    char_vectors = char_vectors)   # added character vectors

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)


        ### start our code:
        self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size,
                                                  hidden_size=hidden_size,
                                                  dropout = 0.2)

        ### end our code
        self.linear = nn.Linear(in_features = 8*self.hidden_size, out_features = 2*self.hidden_size, bias=True)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=4,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 15
0
    def __init__(self, word_vectors, char_vectors, hidden_size, kernel_size, filters, drop_prob=0.):
        super(QANet, self).__init__()

        # Input embedding layer
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob,
                                    char_vectors=char_vectors)   # added character vectors

        # resize input embedding layer output size to fit embedding encoder layer input size
        self.resize_emb_pe = nn.Linear(in_features = hidden_size, out_features = filters, bias=False)

        # Embedding encoder layer
        self.emb_enc = qa.EncoderBlock(input_size=filters, kernel_size=kernel_size,
                                       filters=filters, num_conv_layers=4, drop_prob=drop_prob)

        # Context-Query attention layer
        self.att = layers.BiDAFAttention(hidden_size = filters,
                                         drop_prob = drop_prob)

        # Model encoder layer:
        mod_enc = qa.EncoderBlock(input_size=4*filters, kernel_size=5,
                                  filters=filters, num_conv_layers=2, drop_prob=drop_prob)
        self.mod_enc = nn.ModuleList([mod_enc]*7)   # 7 number of blocks

        # QANet Output layer
        self.output = qa.QANetOutput(input_size=4*filters, drop_prob=drop_prob)
Esempio n. 16
0
    def __init__(self,
                 word_vectors,
                 hidden_size,
                 output_size,
                 device,
                 drop_prob=0.,
                 num_layers=1):
        super(Seq2Seq, self).__init__()

        self.hidden_size = hidden_size
        self.device = device
        self.word_vectors = word_vectors
        self.model_type = 'seq2seq'

        #self.emb = nn.Embedding(num_embeddings=output_size, embedding_dim=hidden_size)
        #self.dropout = nn.Dropout(p=drop_prob)
        self.emb = layers.Embedding(word_vectors,
                                    hidden_size,
                                    drop_prob=drop_prob)

        self.encoder = layers.EncoderRNN(input_size=hidden_size,
                                         hidden_size=hidden_size,
                                         num_layers=num_layers,
                                         drop_prob=drop_prob)

        self.decoder = layers.DecoderRNN(input_size=hidden_size,
                                         hidden_size=hidden_size,
                                         num_layers=num_layers,
                                         drop_prob=drop_prob)

        self.generator = layers.Generator(hidden_size, output_size)
Esempio n. 17
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        # self.enc = layers.RNNEncoder(input_size=hidden_size,
        #                              hidden_size=hidden_size,
        #                              num_layers=1,
        #                              drop_prob=drop_prob)

        # self.transformer = make_model(word_vectors, drop_prob, hidden_size)

        self.emb_enc = EncoderBlock(conv_num=4, ch_num=64, k=7)

        self.att = layers.BiDAFAttention(hidden_size=hidden_size,
                                         drop_prob=drop_prob)

        # TODO
        self.mod = layers.RNNEncoder(input_size=4 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 18
0
    def __init__(self,
                 word_vectors,
                 char_vectors,
                 hidden_size,
                 num_heads,
                 char_embed_drop_prob,
                 drop_prob=0.1):
        super(SketchyReader, self).__init__()
        '''class QANet(nn.Module):

        def __init__(self, word_vectors, char_vectors, hidden_size, device, drop_prob=0.):
        super(QANet, self).__init__()

        self.device = device'''

        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    char_embed_drop_prob=char_embed_drop_prob,
                                    word_embed_drop_prob=drop_prob)

        hidden_size *= 2  # update hidden size for other layers due to char embeddings

        self.c_resizer = layers.Initialized_Conv1d(hidden_size, 128)

        self.q_resizer = layers.Initialized_Conv1d(hidden_size, 128)

        self.model_resizer = layers.Initialized_Conv1d(512, 128)

        self.enc = layers.StackedEncoder(
            num_conv_blocks=4,
            kernel_size=7,
            num_heads=num_heads,
            dropout=drop_prob)  # embedding encoder layer
        self.att = layers.BiDAFAttention(
            hidden_size=128,
            drop_prob=drop_prob)  # context-query attention layer

        # self.mod1 = layers.StackedEncoder(num_conv_blocks=2,
        #                                  kernel_size=7,
        #                                  dropout=drop_prob)     # model layer

        # self.mod2 = layers.StackedEncoder(num_conv_blocks=2,
        #                                  kernel_size=7,
        #                                  dropout=drop_prob)     # model layer

        # self.mod3 = layers.StackedEncoder(num_conv_blocks=2,
        #                                  kernel_size=7,
        #                                  dropout=drop_prob)     # model layer
        self.model_encoder_layers = nn.ModuleList([
            layers.StackedEncoder(num_conv_blocks=2,
                                  kernel_size=7,
                                  dropout=drop_prob) for _ in range(7)
        ])

        self.out = layers.SketchyOutput(hidden_size=128)  # output layer
Esempio n. 19
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.,twist_embeddings=False):
        super(BiDAF_charCNN_BERTEnc_BERTMod, self).__init__()
        
        ###
        self.twist_embeddings = twist_embeddings
        idx_list = []
        for i in range(hidden_size):
            idx_list.append(i)
            idx_list.append(hidden_size+i)
        self.register_buffer('idx_twist',torch.tensor(idx_list))
        ###
        
        
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.char_emb = layers.CharEmbedding(char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)
        
        self.hwy = layers.HighwayEncoder(2, 2*hidden_size)

        self.enc = bert_layers.BertEncoder(n_layers=3, #n_layers=4,
                                           d_feature=2*hidden_size, 
                                           n_heads=8,
                                           out_size=2*hidden_size,
                                           #d_ff=2048,
                                           d_ff = 2*hidden_size, 
                                           dropout_prob=0.1,
                                           #dropout_prob=drop_prob,
                                           ff_activation=F.relu)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)
        
        self.mod = bert_layers.BertEncoder(n_layers=3, #n_layers=3,
                                           d_feature=8*hidden_size, 
                                           n_heads=8,
                                           out_size=2*hidden_size,
                                           #d_ff=2048,
                                           d_ff = 2*hidden_size, 
                                           dropout_prob=0.1,
                                           #dropout_prob=drop_prob,
                                           ff_activation=F.relu)

        # self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
        #                              hidden_size=hidden_size,
        #                              num_layers=2,
        #                              drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 20
0
    def __init__(self, vectors, hidden_size, char_limit, use_transformer, use_GRU, drop_prob=.1, **kwargs):
        super(BiDAF, self).__init__()
        self.use_transformer = use_transformer
        self.use_GRU = use_GRU
        self.hidden_size = hidden_size

        self.emb = layers.Embedding(vectors=vectors,
                                    c2w_size=kwargs['c2w_size'],
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob,
                                    char_limit=char_limit)
        if not use_transformer:
            self.enc = layers.RNNEncoder(input_size=hidden_size,
                                         hidden_size=hidden_size,  # output = 2*hidden_size
                                         num_layers=1,
                                         drop_prob=drop_prob,
                                         use_GRU=use_GRU)
            self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                         hidden_size=hidden_size,  # output = 2*hidden_size
                                         num_layers=2,
                                         drop_prob=drop_prob,
                                         use_GRU=use_GRU)
            self.out = layers.BiDAFOutput(hidden_size=2 * hidden_size, drop_prob=drop_prob,
                                          use_transformer=use_transformer)
        else:
            self.heads = kwargs['heads']
            self.inter_size = kwargs['inter_size']
            self.enc = layers.TransformerEncoderStack(
                N=kwargs['enc_blocks'],
                heads=self.heads,
                input_size=hidden_size,
                output_size=hidden_size,
                inter_size=self.inter_size,
                num_conv=kwargs['enc_convs'],
                drop_prob=drop_prob,
                p_sdd=kwargs['p_sdd']
                )
            self.squeeze = layers.InitializedLayer(4*hidden_size, hidden_size, bias=False)
            self.mod = layers.TransformerEncoderStack(
                N=kwargs['mod_blocks'],
                heads=self.heads,
                input_size=hidden_size,
                output_size=hidden_size,
                inter_size=self.inter_size,
                num_conv=kwargs['mod_convs'],
                drop_prob=drop_prob,
                p_sdd=kwargs['p_sdd']
                )
            self.out = layers.QAOutput(2*hidden_size)

        self.att = layers.BiDAFAttention(hidden_size=(1 if self.use_transformer else 2)*hidden_size,
                                         drop_prob=drop_prob)  # (batch_size, seq_len, 4*input_hidden_size)
Esempio n. 21
0
    def __init__(self,
                 word_vectors,
                 char_vectors,
                 context_max_len,
                 query_max_len,
                 d_model,
                 train_cemb=False,
                 pad=0,
                 dropout=0.1,
                 num_head=8):
        """
        """
        super(QANet, self).__init__()
        if train_cemb:
            self.char_emb = nn.Embedding.from_pretrained(char_vectors,
                                                         freeze=False)
            print("Training char_embeddings")
        else:
            self.char_emb = nn.Embedding.from_pretrained(char_vectors)

        self.word_emb = nn.Embedding.from_pretrained(word_vectors)
        self.LC = context_max_len
        self.LQ = query_max_len
        self.num_head = num_head
        self.pad = pad
        self.dropout = dropout

        wemb_dim = word_vectors.size()[1]
        cemb_dim = char_vectors.size()[1]
        #print("Word vector dim-%d, Char vector dim-%d" % (wemb_dim, cemb_dim))

        #Layer Declarations
        self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model)
        self.emb_enc = layers.Encoder(num_conv=4,
                                      d_model=d_model,
                                      num_head=num_head,
                                      k=7,
                                      dropout=0.1)
        self.cq_att = layers.CQAttention(d_model=d_model)
        self.cq_resizer = layers.Initialized_Conv1d(
            d_model * 4, d_model
        )  #Foward layer to reduce dimension of cq_att output back to d_dim
        self.model_enc_blks = nn.ModuleList([
            layers.Encoder(num_conv=2,
                           d_model=d_model,
                           num_head=num_head,
                           k=5,
                           dropout=0.1) for _ in range(7)
        ])
        self.out = layers.QAOutput(d_model)
Esempio n. 22
0
    def __init__(self, embeddings: InputEmbeddings, hidden_size, drop_prob=0.):
        super(SLQA, self).__init__()
        word_vectors = embeddings.word_vectors
        char_vectors = embeddings.char_vectors

        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    char_vectors=char_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = AlignedAttention(hidden_size=2 * hidden_size,
                                    drop_prob=drop_prob)

        self.p_fusion1 = FusionLayer(2 * hidden_size)

        self.q_fusion1 = FusionLayer(2 * hidden_size)

        self.p_enc_eq_13 = layers.RNNEncoder(input_size=2 * hidden_size,
                                             hidden_size=hidden_size,
                                             num_layers=1,
                                             drop_prob=drop_prob)

        self.q_enc_eq_13 = layers.RNNEncoder(input_size=2 * hidden_size,
                                             hidden_size=hidden_size,
                                             num_layers=1,
                                             drop_prob=drop_prob)

        self.self_attention = FusedSelfAttention(2 * hidden_size)

        self.p_enc_eq_17 = layers.RNNEncoder(input_size=2 * hidden_size,
                                             hidden_size=hidden_size,
                                             num_layers=1,
                                             drop_prob=drop_prob)
        self.q_enc_eq_17 = layers.RNNEncoder(input_size=2 * hidden_size,
                                             hidden_size=hidden_size,
                                             num_layers=1,
                                             drop_prob=drop_prob)

        self.q_linear_align_18 = LinearAlign(2 * hidden_size)

        self.bilinear_start = BilinearSeqAtt(2 * hidden_size, 2 * hidden_size)
        self.bilinear_end = BilinearSeqAtt(2 * hidden_size, 2 * hidden_size)
Esempio n. 23
0
    def __init__(self, weights_matrix, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()
        self.emb = layers.Embedding(weights_matrix=weights_matrix,
                                    hidden_size=hidden_size)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size)
Esempio n. 24
0
 def __init__(self, word_vectors, hidden_size, drop_prob=0.):
     super(BiDAF, self).__init__()
     self.emb = layers.Embedding(word_vectors=word_vectors,
                                 hidden_size=hidden_size,
                                 drop_prob=drop_prob)
     self.enc = layers.RNNEncoder(input_size=hidden_size,
                                  hidden_size=hidden_size,
                                  num_layers=1,
                                  drop_prob=drop_prob)
     self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                      drop_prob=drop_prob)
     self.mod = layers.TPRRNN(word_emb_size=(8 * hidden_size),
                              n_symbols=100,
                              d_symbols=10,
                              n_roles=20,
                              d_roles=10,
                              hidden_size=hidden_size)
     self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                   drop_prob=drop_prob)
Esempio n. 25
0
    def __init__(self, word_vectors, hidden_size, drop_prob=0.):
        super(BERT, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=300,
                                    drop_prob=drop_prob)

        self.bert_start = nn.Linear(in_features=300, out_features=1, bias=True)
        nn.init.xavier_uniform_(self.bert_start.weight, gain=1)

        self.bert_end = nn.Linear(in_features=300, out_features=1, bias=True)
        nn.init.xavier_uniform_(self.bert_end.weight, gain=1)

        self.proj_up = nn.Linear(in_features=300,
                                 out_features=hidden_size,
                                 bias=True)

        self.proj_down = nn.Linear(in_features=hidden_size,
                                   out_features=300,
                                   bias=True)
        nn.init.xavier_uniform_(self.proj_down.weight, gain=1)
Esempio n. 26
0
    def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0., enable_EM=True, enable_posner=True, enable_selfatt=True):
        super(BiDAF, self).__init__()
        self.embd_size = hidden_size
        self.d = self.embd_size * 2 # word_embedding + char_embedding
        self.enable_EM = enable_EM
        if enable_EM:
            self.d += 2                 # word_feature
        if enable_posner:
            self.d += 10                 # word_feature
        self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors,
                                    hidden_size=self.embd_size,
                                    drop_prob=drop_prob, enable_posner=enable_posner)

        self.enc = layers.RNNEncoder(input_size=self.d,
                                     hidden_size=self.d,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * self.d,
                                         drop_prob=drop_prob)

        self.enable_selfatt = enable_selfatt
        if enable_selfatt:
            # self.selfMatch = layers.SelfMatcher(in_size = 8 * self.d,
            #                                  drop_prob=drop_prob)
            self.selfMatch = layers.StaticDotAttention(memory_size = 2 * self.d, 
                            input_size = 2 * self.d, attention_size = 2 * self.d,
                            drop_prob=drop_prob)

            self.mod = layers.RNNEncoder(input_size=4 * self.d,
                                         hidden_size=self.d,
                                         num_layers=2,
                                         drop_prob=drop_prob)
        else:
            self.mod = layers.RNNEncoder(input_size=2 * self.d,
                                         hidden_size=self.d,
                                         num_layers=2,
                                         drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=self.d,
                                      drop_prob=drop_prob)
Esempio n. 27
0
    def __init__(self, word_vectors, hidden_size, drop_prob=0.):
        super(BiDAF_attDCA, self).__init__()
        self.emb = layers.Embedding(word_vectors=word_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = DoubleCrossAttention(hidden_size=2 * hidden_size,
                                        drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=6 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = BiDAFOutput_att(hidden_size=hidden_size,
                                   att_put_h_size=6 * hidden_size,
                                   drop_prob=drop_prob)
Esempio n. 28
0
    def __init__(self,
                 word_vectors,
                 hidden_size,
                 output_size,
                 device,
                 drop_prob=0.,
                 num_layers=1):
        super(Seq2SeqAttn, self).__init__()

        self.hidden_size = hidden_size
        self.word_vectors = word_vectors
        self.device = device
        self.enc_hiddens = None
        self.enc_masks = None
        self.model_type = 'seq2seq_attn'

        #self.emb = nn.Embedding(num_embeddings=output_size, embedding_dim=hidden_size)
        self.emb = layers.Embedding(word_vectors, hidden_size)

        self.encoder = layers.EncoderRNN(input_size=hidden_size,
                                         hidden_size=hidden_size,
                                         num_layers=num_layers,
                                         drop_prob=drop_prob)

        self.decoder = layers.DecoderRNN(input_size=2 * hidden_size,
                                         hidden_size=hidden_size,
                                         num_layers=num_layers,
                                         drop_prob=drop_prob)

        self.att_projection = nn.Linear(in_features=2 * hidden_size,
                                        out_features=hidden_size,
                                        bias=False)
        self.combined_output_projection = nn.Linear(in_features=3 *
                                                    hidden_size,
                                                    out_features=hidden_size,
                                                    bias=False)
        self.generator = layers.Generator(hidden_size, output_size)
        self.dropout = nn.Dropout(p=drop_prob)
Esempio n. 29
0
    def __init__(self,
                 word_mat,
                 w_embedding_size,
                 c_embeding_size,
                 c_vocab_size,
                 hidden_size,
                 num_head=1,
                 drop_prob=0.2):
        super(BiDAF, self).__init__()
        self.emb = layers.Embedding(word_mat, w_embedding_size,
                                    c_embeding_size, c_vocab_size, hidden_size,
                                    drop_prob)
        self.enc = layers.RNNEncoder(input_size=w_embedding_size + hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)
        self.var_dropout = layers.VariationalDropout(drop_prob,
                                                     batch_first=True)
        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)
        self.linear_trans = nn.Sequential(
            nn.Linear(8 * hidden_size, 2 * hidden_size), nn.ReLU())
        self.attn_mod = layers.RNNEncoder(hidden_size * 2,
                                          hidden_size,
                                          num_layers=1,
                                          drop_prob=drop_prob)

        self.self_attn = layers.BiDAFSelfAttention(num_head, 2 * hidden_size)
        self.linear_attn = nn.Sequential(
            nn.Linear(2 * hidden_size, 2 * hidden_size), nn.ReLU())

        self.mod = layers.RNNEncoder(input_size=2 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size,
                                      drop_prob=drop_prob)
Esempio n. 30
0
    def __init__(self, word_vectors, ch_vectors, hidden_size, drop_prob=0.):
        super(BiDAF, self).__init__()
        torch.cuda.empty_cache()
        self.emb = layers.Embedding(word_vectors=word_vectors, ch_vectors=ch_vectors,
                                    hidden_size=hidden_size,
                                    drop_prob=drop_prob)

        self.enc = layers.RNNEncoder(input_size=hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     drop_prob=drop_prob)

        self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                         drop_prob=drop_prob)

        self.mod = layers.RNNEncoder(input_size=8 * hidden_size,
                                     hidden_size=hidden_size,
                                     num_layers=2,
                                     drop_prob=drop_prob)

        self.chunk = layers.ChunkLayer(hidden_size=hidden_size, max_ans_len=10)

        self.out = layers.BiDAFOutput(hidden_size=hidden_size, max_ans_len=10)