Exemple #1
0
    def forward(self, x):

        x = self.embed(x)

        if (self.cove):
            outputs_both_layer_cove_with_glove = MTLSTM(
                n_vocab=None,
                vectors=None,
                layer0=True,
                residual_embeddings=True)
            outputs_both_layer_cove_with_glove.cuda()
            x = outputs_both_layer_cove_with_glove(x,
                                                   [x.shape[1]] * x.shape[0])

        x = x.unsqueeze(1)

        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]

        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]

        x = torch.cat(x, 1)

        x = self.dropout(x)

        output = self.fully_connected(x)

        return output
Exemple #2
0
def compute_torch_values(inputs, embeddings):
    model = MTLSTM(n_vocab=embeddings.shape[0],
                   vectors=torch.from_numpy(embeddings.astype(np.float32)))
    model.cuda(0)
    model_inputs = Variable(torch.from_numpy(inputs.astype(np.int64)))
    lengths = torch.from_numpy(
        np.ones((inputs.shape[0], ), dtype=np.int64) * inputs.shape[1])
    cove_outputs = model.forward(model_inputs.cuda(), lengths=lengths.cuda())
    torch_output = (cove_outputs.data.cpu().numpy())
    print("Torch output shape", torch_output.shape)
    return torch_output
Exemple #3
0
class tmcove(Model):
    def load(self, vectors):
        self.model = MTLSTM(n_vocab=len(vectors.keys()), vectors=vectors)
        self.model.cuda()

    def train(self, X, Y):
        pass

    def predict(self, X):
        X, Y = self.input_function(X, [])
        return [[get_word2vec(token, self.vectors) for token in tokens_list]
                for tokens in X]
Exemple #4
0
def save_cove_weights(options):
    """Saves the weights of the CoVe LSTM for manual TensorFlow initialization.
    """
    folder_name = os.path.join(options.data_dir, constants.COVE_WEIGHTS_FOLDER)
    if all([os.path.exists(os.path.join(folder_name, name + ".npy")) \
        for name in constants.COVE_WEIGHT_NAMES]):
        print("Cove weights already saved")
        return
    os.makedirs(folder_name, exist_ok=True)
    vocab = get_vocab(options.data_dir)
    embeddings = embedding_util.load_word_embeddings_including_unk_and_padding(
        options)
    vec_size = 2 * embeddings.shape[1]
    print("Loading CoVe model")
    model = MTLSTM(n_vocab=embeddings.shape[0],
                   vectors=torch.from_numpy(embeddings.astype(np.float32)))
    print("Saving CoVe weights")
    for weight_name in constants.COVE_WEIGHT_NAMES:
        tensor = getattr(model.rnn, weight_name)
        np_value = tensor.cpu().data.numpy()
        full_file_name = os.path.join(folder_name, weight_name + ".npy")
        np.save(full_file_name, np_value)
from torchtext import data
from torchtext import datasets
from torchtext.vocab import GloVe

from cove import MTLSTM

TEXT = data.Field(lower=True, include_lengths=True, batch_first=True)
LABEL = data.Field(lower=True, include_lengths=True, batch_first=True)

train_path = "C:\\Users\\bhara\\Downloads\\NNNlpHW3\\suggestionMining\\data\\Subtask-A\\V1.4_Training.csv"
train = data.TabularDataset(path=train_path,
                            format='csv',
                            fields=[('id', None), ('sentence', TEXT),
                                    ('label', LABEL)])

TEXT.build_vocab(train,
                 vectors=GloVe(name='840B', dim=300, cache='.embeddings'))
LABEL.build_vocab(train)
outputs_cove_with_glove = MTLSTM(n_vocab=len(TEXT.vocab),
                                 vectors=TEXT.vocab.vectors,
                                 residual_embeddings=True,
                                 model_cache='.embeddings')
#glove_then_first_then_last_layer_cove = outputs_both_layer_cove_with_glove(<pass a sentence Glove embedding>)

train_iter = data.Iterator((train), batch_size=100)

z = None
for batch_idx, batch in enumerate(train_iter):
    z = batch
    glove_then_last_layer_cove = outputs_cove_with_glove(*batch.sentence)
    print(glove_then_last_layer_cove.size())
Exemple #6
0
     NUM),  # we won't be needing the id, so we pass in None as the field
    ('moment', TEXT)
]  # process it as text
tst = data.TabularDataset(
    path=path + "test_data.csv",  # the file path
    format='csv',
    skip_header=
    True,  # if your csv header has a header, make sure to pass this to ensure it doesn't get proceesed as data!
    fields=tst_datafields)

# build the vocabulary using train and validation dataset and assign the vectors
TEXT.build_vocab(trainds, valds, max_size=100000, vectors=vec)
# build vocab for labels
LABEL.build_vocab(trainds)

outputs_last_layer_cove = MTLSTM(n_vocab=len(TEXT.vocab),
                                 vectors=TEXT.vocab.vectors)
outputs_both_layer_cove = MTLSTM(n_vocab=len(TEXT.vocab),
                                 vectors=TEXT.vocab.vectors,
                                 layer0=True)
outputs_both_layer_cove_with_glove = MTLSTM(n_vocab=len(TEXT.vocab),
                                            vectors=TEXT.vocab.vectors,
                                            layer0=True,
                                            residual_embeddings=True)

traindl, valdl = data.BucketIterator.splits(
    datasets=(trainds, valds),  # specify train and validation Tabulardataset
    batch_sizes=(64, len(valid)),  # batch size of train and validation
    sort_key=lambda x: len(x.moment
                           ),  # on what attribute the text should be sorted
    device=None,  # -1 mean cpu and 0 or None mean gpu
    sort_within_batch=True,
Exemple #7
0
    def __init__(self, field, args):
        super().__init__()
        self.field = field
        self.args = args
        self.pad_idx = self.field.vocab.stoi[self.field.pad_token]

        self.encoder_embeddings = Embedding(field,
                                            args.dimension,
                                            dropout=args.dropout_ratio,
                                            project=not args.cove)
        self.decoder_embeddings = Embedding(field,
                                            args.dimension,
                                            dropout=args.dropout_ratio,
                                            project=True)

        if self.args.cove or self.args.intermediate_cove:
            self.cove = MTLSTM(model_cache=args.embeddings,
                               layer0=args.intermediate_cove,
                               layer1=args.cove)
            cove_dim = int(args.intermediate_cove) * 600 + int(
                args.cove
            ) * 600 + 400  # the last 400 is for GloVe and char n-gram embeddings
            self.project_cove = Feedforward(cove_dim, args.dimension)

        self.bilstm_before_coattention = PackedLSTM(args.dimension,
                                                    args.dimension,
                                                    batch_first=True,
                                                    dropout=args.dropout_ratio,
                                                    bidirectional=True,
                                                    num_layers=1)
        self.coattention = CoattentiveLayer(args.dimension, dropout=0.3)
        dim = 2 * args.dimension + args.dimension + args.dimension

        self.context_bilstm_after_coattention = PackedLSTM(
            dim,
            args.dimension,
            batch_first=True,
            dropout=args.dropout_ratio,
            bidirectional=True,
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_context = TransformerEncoder(
            args.dimension, args.transformer_heads, args.transformer_hidden,
            args.transformer_layers, args.dropout_ratio)
        self.bilstm_context = PackedLSTM(args.dimension,
                                         args.dimension,
                                         batch_first=True,
                                         dropout=args.dropout_ratio,
                                         bidirectional=True,
                                         num_layers=args.rnn_layers)

        self.question_bilstm_after_coattention = PackedLSTM(
            dim,
            args.dimension,
            batch_first=True,
            dropout=args.dropout_ratio,
            bidirectional=True,
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_question = TransformerEncoder(
            args.dimension, args.transformer_heads, args.transformer_hidden,
            args.transformer_layers, args.dropout_ratio)
        self.bilstm_question = PackedLSTM(args.dimension,
                                          args.dimension,
                                          batch_first=True,
                                          dropout=args.dropout_ratio,
                                          bidirectional=True,
                                          num_layers=args.rnn_layers)

        self.self_attentive_decoder = TransformerDecoder(
            args.dimension, args.transformer_heads, args.transformer_hidden,
            args.transformer_layers, args.dropout_ratio)
        self.dual_ptr_rnn_decoder = DualPtrRNNDecoder(
            args.dimension,
            args.dimension,
            dropout=args.dropout_ratio,
            num_layers=args.rnn_layers)

        self.generative_vocab_size = min(len(field.vocab),
                                         args.max_generative_vocab)
        self.out = nn.Linear(args.dimension, self.generative_vocab_size)

        self.dropout = nn.Dropout(0.4)
Exemple #8
0
 def load(self, vectors):
     self.model = MTLSTM(n_vocab=len(vectors.keys()), vectors=vectors)
     self.model.cuda()
Exemple #9
0
import torch
from torchtext import data
from torchtext import datasets

from cove import MTLSTM

inputs = data.Field(lower=True, include_lengths=True, batch_first=True)
answers = data.Field(sequential=False)

print('Generating train, dev, test splits')
train, dev, test = datasets.SNLI.splits(inputs, answers)

print('Building vocabulary')
inputs.build_vocab(train, dev, test)
inputs.vocab.load_vectors(wv_type='glove.840B', wv_dim=300)
answers.build_vocab(train)

model = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors)
model.cuda(0)

train_iter, dev_iter, test_iter = data.BucketIterator.splits(
    (train, dev, test), batch_size=100, device=0)

train_iter.init_epoch()
print('Generating CoVe')
for batch_idx, batch in enumerate(train_iter):
    model.train()
    cove_premise = model(*batch.premise)
    cove_hypothesis = model(*batch.hypothesis)
    def __init__(self, field, args):
        super().__init__()
        self.field = field
        self.args = args
        self.pad_idx = self.field.vocab.stoi[self.field.pad_token]
        def dp(args):
            return args.dropout_ratio if args.rnn_layers > 1 else 0.

        if self.args.glove_and_char:
        
            self.encoder_embeddings = Embedding(field, args.dimension, 
                dropout=args.dropout_ratio, project=not args.cove)
    
            if self.args.cove or self.args.intermediate_cove:
                self.cove = MTLSTM(model_cache=args.embeddings, layer0=args.intermediate_cove, layer1=args.cove)
                cove_params = get_trainable_params(self.cove) 
                for p in cove_params:
                    p.requires_grad = False
                cove_dim = int(args.intermediate_cove) * 600 + int(args.cove) * 600 + 400 # the last 400 is for GloVe and char n-gram embeddings
                self.project_cove = Feedforward(cove_dim, args.dimension)

        if -1 not in self.args.elmo:
            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
            weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"
            self.elmo = Elmo(options_file, weight_file, 3, dropout=0.0, do_layer_norm=False)
            elmo_params = get_trainable_params(self.elmo)
            for p in elmo_params:
                p.requires_grad = False
            elmo_dim = 1024 * len(self.args.elmo)
            self.project_elmo = Feedforward(elmo_dim, args.dimension)
            if self.args.glove_and_char:
                self.project_embeddings = Feedforward(2 * args.dimension, args.dimension, dropout=0.0)
        
        self.decoder_embeddings = Embedding(field, args.dimension, 
            dropout=args.dropout_ratio, project=True)
    
        self.bilstm_before_coattention = PackedLSTM(args.dimension,  args.dimension,
            batch_first=True, bidirectional=True, num_layers=1, dropout=0)
        self.coattention = CoattentiveLayer(args.dimension, dropout=0.3)
        dim = 2*args.dimension + args.dimension + args.dimension

        self.context_bilstm_after_coattention = PackedLSTM(dim, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_context = TransformerEncoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio)
        self.bilstm_context = PackedLSTM(args.dimension, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)

        self.question_bilstm_after_coattention = PackedLSTM(dim, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)
        self.self_attentive_encoder_question = TransformerEncoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio)
        self.bilstm_question = PackedLSTM(args.dimension, args.dimension,
            batch_first=True, dropout=dp(args), bidirectional=True, 
            num_layers=args.rnn_layers)

        self.self_attentive_decoder = TransformerDecoder(args.dimension, args.transformer_heads, args.transformer_hidden, args.transformer_layers, args.dropout_ratio)
        self.dual_ptr_rnn_decoder = DualPtrRNNDecoder(args.dimension, args.dimension,
            dropout=args.dropout_ratio, num_layers=args.rnn_layers)

        self.generative_vocab_size = min(len(field.vocab), args.max_generative_vocab)
        self.out = nn.Linear(args.dimension, self.generative_vocab_size)

        self.dropout = nn.Dropout(0.4)
class RnnDocReader(nn.Module):
    """Network for the Document Reader module of DrQA."""
    RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU, 'rnn': nn.RNN}

    def __init__(self, opt, padding_idx=0, embedding=None, normalize_emb=False,embedding_order=True):
        super(RnnDocReader, self).__init__()
        # Store config
        self.opt = opt
        '''
        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding(embedding.size(0),
                                          embedding.size(1),
                                          padding_idx=padding_idx)
            if normalize_emb: normalize_emb_(embedding)
            self.embedding.weight.data = embedding

            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                for p in self.embedding.parameters():
                    p.requires_grad = False
            elif opt['tune_partial'] > 0:
                assert opt['tune_partial'] + 2 < embedding.size(0)
                fixed_embedding = embedding[opt['tune_partial'] + 2:]
                self.register_buffer('fixed_embedding', fixed_embedding)
                self.fixed_embedding = fixed_embedding
        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        '''
        if opt['pos']:
            self.pos_embedding = nn.Embedding(opt['pos_size'], opt['pos_dim'])
            if normalize_emb: normalize_emb_(self.pos_embedding.weight.data)
        if opt['ner']:
            self.ner_embedding = nn.Embedding(opt['ner_size'], opt['ner_dim'])
            if normalize_emb: normalize_emb_(self.ner_embedding.weight.data)
        # Projection for attention weighted question
        if opt['use_qemb']:
            self.qemb_match = layers.SeqAttnMatch(3 * opt['embedding_dim'])
        if opt['use_cove']:
            self.cove_embedding = MTLSTM(n_vocab=embedding.size(0),vectors=embedding.clone(),residual_embeddings=True)
            if not opt['fine_tune']:
                for p in self.cove_embedding.parameters():
                    p.requires_grad=False

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt['embedding_dim'] + opt['num_features']
        question_input_size = opt['embedding_dim']
        if opt['use_qemb']:
            doc_input_size += opt['embedding_dim']
        if opt['pos']:
            doc_input_size += opt['pos_dim']
        if opt['ner']:
            doc_input_size += opt['ner_dim']
        if opt['use_cove']:
            # for Cove
            doc_input_size+=2* opt['embedding_dim']
            question_input_size += 2*opt['embedding_dim']

        print('doc_input_size:',doc_input_size)
        self.attention_rnns= custom.AttentionRNN(opt,doc_input_size=doc_input_size,question_input_size=question_input_size, ratio=opt['reduction_ratio'])

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt['hidden_size'] +opt['hidden_size']//opt['reduction_ratio']
        question_hidden_size =  2 * opt['hidden_size']+opt['hidden_size']//opt['reduction_ratio']

        # Question merging
        if opt['question_merge'] not in ['avg', 'self_attn']:
            raise NotImplementedError('question_merge = %s' % opt['question_merge'])
        if opt['question_merge'] == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)


        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )

    def forward(self, x1, x1_f, x1_pos, x1_ner, x1_mask, x2, x2_mask,x1_order,x2_order):
        """Inputs:
        x1 = document word indices             [batch * len_d]
        x1_f = document word features indices  [batch * len_d * nfeat]
        x1_pos = document POS tags             [batch * len_d]
        x1_ner = document entity tags          [batch * len_d]
        x1_mask = document padding mask        [batch * len_d]
        x2 = question word indices             [batch * len_q]
        x2_mask = question padding mask        [batch * len_q]
        """

        # Embed both document and question
        #x1_emb = self.embedding(x1)
        if self.opt['use_cove']:
            x1_emb_cove=self.cove_embedding(x1,torch.LongTensor(x1.size(0)).fill_(x1.size(1)).cuda())
        #x1_emb_order = self.embedding_order(x1_order)

        #x2_emb = self.embedding(x2)
        if self.opt['use_cove']:
            x2_emb_cove=  self.cove_embedding(x2,torch.LongTensor(x2.size(0)).fill_(x2.size(1)).cuda())
        #x2_emb += self.embedding_order(x2_order)

        '''
        if self.opt['dropout_emb'] > 0:
            x1_emb = nn.functional.dropout(x1_emb, p=self.opt['dropout_emb'],
                                               training=self.training)
            x2_emb = nn.functional.dropout(x2_emb, p=self.opt['dropout_emb'],
                                           training=self.training)

        x2_emb = torch.cat([x2_emb, x2_emb_cove], dim=2)
        x1_emb = torch.cat([x1_emb, x1_emb_cove], dim=2)
        '''

        x2_emb = x2_emb_cove
        x1_emb = x1_emb_cove


        drnn_input_list = [x1_emb, x1_f]
        # Add attention-weighted question representation
        if self.opt['use_qemb']:
            x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask)
            drnn_input_list.append(x2_weighted_emb)
        if self.opt['pos']:
            x1_pos_emb = self.pos_embedding(x1_pos)
            if self.opt['dropout_emb'] > 0:
                x1_pos_emb = nn.functional.dropout(x1_pos_emb, p=self.opt['dropout_emb'],
                                               training=self.training)
            drnn_input_list.append(x1_pos_emb)
        if self.opt['ner']:
            x1_ner_emb = self.ner_embedding(x1_ner)
            if self.opt['dropout_emb'] > 0:
                x1_ner_emb = nn.functional.dropout(x1_ner_emb, p=self.opt['dropout_emb'],
                                               training=self.training)
            drnn_input_list.append(x1_ner_emb)
        drnn_input = torch.cat(drnn_input_list, 2)
        #print('drnn_input:',drnn_input.size())
        # Encode document with RNN
        doc_hiddens, question_hiddens = self.attention_rnns(drnn_input,x1_mask,x2_emb,x2_mask)
        if self.opt['question_merge'] == 'avg':
            q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask)
        elif self.opt['question_merge'] == 'self_attn':
            q_merge_weights = self.self_attn(question_hiddens, x2_mask)
        question_hidden = layers.weighted_avg(question_hiddens, q_merge_weights)

        start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask)
        end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask)
        return start_scores, end_scores
Exemple #12
0
    def __init__(self, args):
        super(FusionNetReader, self).__init__()
        # Store config
        self.args = args

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(args.vocab_size,
                                      args.embedding_dim,
                                      padding_idx=0)
        if args.use_cove and args.embedding_dim == 300:
            # init cove_encoder without additional embeddings
            self.cove_encoder = MTLSTM()  # 300
            for p in self.cove_encoder.parameters():
                p.requires_grad = False

        if args.use_qemb:
            self.qemb_match = layers.SeqAttnMatch(args.embedding_dim)

        # Input size to RNN: word emb + cove emb + manual features + question emb
        doc_input_size = args.embedding_dim + args.num_features
        question_input_size = args.embedding_dim
        if args.use_cove:
            doc_input_size += 2 * args.cove_embedding_dim
            question_input_size += 2 * args.cove_embedding_dim
        if args.use_qemb:
            doc_input_size += args.embedding_dim

        # Reading component (low-level layer)
        self.reading_low_level_doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        self.reading_low_level_question_rnn = layers.StackedBRNN(
            input_size=question_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        # Reading component (high-level layer)
        self.reading_high_level_doc_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        self.reading_high_level_question_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        # Question understanding component
        # input: [low_level_question, high_level_question]
        self.understanding_question_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 4,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        # [word_embedding, cove_embedding, low_level_doc_hidden, high_level_doc_hidden]
        history_of_word_size = args.embedding_dim + 2 * args.cove_embedding_dim + 4 * args.hidden_size

        # self.low_level_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_word_size,
        #                                                                               args.attention_size,
        #                                                                               F.relu))
        # self.high_level_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_word_size,
        #                                                                                args.attention_size,
        #                                                                                F.relu))
        # self.understanding_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_word_size,
        #                                                                                   args.attention_size,
        #                                                                                   F.relu))

        # self.low_level_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_word_size,
        #                                                                      history_of_word_size))
        # self.high_level_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_word_size,
        #                                                                       history_of_word_size))
        # self.understanding_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_word_size,
        #                                                                          history_of_word_size))

        self.low_level_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_word_size,
                                                                            args.attention_size)
        self.high_level_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_word_size,
                                                                             args.attention_size)
        self.understanding_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_word_size,
                                                                                args.attention_size)

        # Multi-level rnn
        # input: [low_level_doc, high_level_doc, low_level_fusion_doc, high_level_fusion_doc,
        # understanding_level_question_fusion_doc]
        self.multi_level_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2 * 5,
            hidden_size=args.hidden_size,
            num_layers=1,
            padding=args.rnn_padding
        )

        # [word_embedding, cove_embedding, low_level_doc_hidden, high_level_doc_hidden, low_level_doc_question_vector,
        # high_level_doc_question_vector, understanding_doc_question_vector, fa_multi_level_doc_hidden]
        history_of_doc_word_size = history_of_word_size + 4 * 2 * args.hidden_size

        # self.self_boosted_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_doc_word_size,
        #                                                                                  args.attention_size,
        #                                                                                  F.relu))

        self.self_boosted_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_doc_word_size,
                                                                               args.attention_size)

        #
        # self.self_boosted_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_doc_word_size,
        #                                                                         history_of_doc_word_size))
        # Fully-Aware Self-Boosted fusion rnn
        # input: [fully_aware_encoded_doc(hidden state from last layer) ,self_boosted_fusion_doc]
        self.understanding_doc_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2 * 2,
            hidden_size=args.hidden_size,
            num_layers=1,
            padding=args.rnn_padding
        )

        # Output sizes of rnn
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        if args.concat_rnn_layers:
            doc_hidden_size *= args.doc_layers
            question_hidden_size *= args.question_layers

        # Question merging
        self.question_self_attn = layers.LinearSeqAttn(question_hidden_size)

        self.start_attn = layers.BilinearSeqAttn(doc_hidden_size, question_hidden_size, log_normalize=False)

        self.start_gru = nn.GRU(doc_hidden_size, args.hidden_size * 2, batch_first=True)

        self.end_attn = layers.BilinearSeqAttn(doc_hidden_size, question_hidden_size, log_normalize=False)
Exemple #13
0
class FusionNetReader(nn.Module):
    RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU, 'rnn': nn.RNN}

    def __init__(self, args):
        super(FusionNetReader, self).__init__()
        # Store config
        self.args = args

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(args.vocab_size,
                                      args.embedding_dim,
                                      padding_idx=0)
        if args.use_cove and args.embedding_dim == 300:
            # init cove_encoder without additional embeddings
            self.cove_encoder = MTLSTM()  # 300
            for p in self.cove_encoder.parameters():
                p.requires_grad = False

        if args.use_qemb:
            self.qemb_match = layers.SeqAttnMatch(args.embedding_dim)

        # Input size to RNN: word emb + cove emb + manual features + question emb
        doc_input_size = args.embedding_dim + args.num_features
        question_input_size = args.embedding_dim
        if args.use_cove:
            doc_input_size += 2 * args.cove_embedding_dim
            question_input_size += 2 * args.cove_embedding_dim
        if args.use_qemb:
            doc_input_size += args.embedding_dim

        # Reading component (low-level layer)
        self.reading_low_level_doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        self.reading_low_level_question_rnn = layers.StackedBRNN(
            input_size=question_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        # Reading component (high-level layer)
        self.reading_high_level_doc_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        self.reading_high_level_question_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        # Question understanding component
        # input: [low_level_question, high_level_question]
        self.understanding_question_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 4,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            padding=args.rnn_padding
        )

        # [word_embedding, cove_embedding, low_level_doc_hidden, high_level_doc_hidden]
        history_of_word_size = args.embedding_dim + 2 * args.cove_embedding_dim + 4 * args.hidden_size

        # self.low_level_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_word_size,
        #                                                                               args.attention_size,
        #                                                                               F.relu))
        # self.high_level_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_word_size,
        #                                                                                args.attention_size,
        #                                                                                F.relu))
        # self.understanding_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_word_size,
        #                                                                                   args.attention_size,
        #                                                                                   F.relu))

        # self.low_level_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_word_size,
        #                                                                      history_of_word_size))
        # self.high_level_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_word_size,
        #                                                                       history_of_word_size))
        # self.understanding_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_word_size,
        #                                                                          history_of_word_size))

        self.low_level_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_word_size,
                                                                            args.attention_size)
        self.high_level_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_word_size,
                                                                             args.attention_size)
        self.understanding_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_word_size,
                                                                                args.attention_size)

        # Multi-level rnn
        # input: [low_level_doc, high_level_doc, low_level_fusion_doc, high_level_fusion_doc,
        # understanding_level_question_fusion_doc]
        self.multi_level_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2 * 5,
            hidden_size=args.hidden_size,
            num_layers=1,
            padding=args.rnn_padding
        )

        # [word_embedding, cove_embedding, low_level_doc_hidden, high_level_doc_hidden, low_level_doc_question_vector,
        # high_level_doc_question_vector, understanding_doc_question_vector, fa_multi_level_doc_hidden]
        history_of_doc_word_size = history_of_word_size + 4 * 2 * args.hidden_size

        # self.self_boosted_matrix_attention = MatrixAttention(SymmetricBilinearSimilarity(history_of_doc_word_size,
        #                                                                                  args.attention_size,
        #                                                                                  F.relu))

        self.self_boosted_matrix_attention_layer = layers.SymBilinearAttnMatch(history_of_doc_word_size,
                                                                               args.attention_size)

        #
        # self.self_boosted_matrix_attention = MatrixAttention(BilinearSimilarity(history_of_doc_word_size,
        #                                                                         history_of_doc_word_size))
        # Fully-Aware Self-Boosted fusion rnn
        # input: [fully_aware_encoded_doc(hidden state from last layer) ,self_boosted_fusion_doc]
        self.understanding_doc_rnn = layers.StackedBRNN(
            input_size=args.hidden_size * 2 * 2,
            hidden_size=args.hidden_size,
            num_layers=1,
            padding=args.rnn_padding
        )

        # Output sizes of rnn
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        if args.concat_rnn_layers:
            doc_hidden_size *= args.doc_layers
            question_hidden_size *= args.question_layers

        # Question merging
        self.question_self_attn = layers.LinearSeqAttn(question_hidden_size)

        self.start_attn = layers.BilinearSeqAttn(doc_hidden_size, question_hidden_size, log_normalize=False)

        self.start_gru = nn.GRU(doc_hidden_size, args.hidden_size * 2, batch_first=True)

        self.end_attn = layers.BilinearSeqAttn(doc_hidden_size, question_hidden_size, log_normalize=False)

    def forward(self, x1, x1_f, x1_mask, x2, x2_mask):
        """Inputs:
        x1 = document word indices             [batch * len_d]
        x1_mask = document padding mask        [batch * len_d]
        x1_f = document word features indices  [batch * len_d * nfeat]
        x2 = question word indices             [batch * len_q]
        x2_mask = question padding mask        [batch * len_q]
        """
        # Embed both document and question
        x1_word_emb = self.embedding(x1)  # [batch, len_d, embedding_dim]
        x2_word_emb = self.embedding(x2)  # [batch, len_q, embedding_dim]

        x1_lengths = x1_mask.data.eq(0).long().sum(1).squeeze()  # batch
        x2_lengths = x2_mask.data.eq(0).long().sum(1).squeeze()  # batch

        x1_cove_emb = self.cove_encoder(x1_word_emb, x1_lengths)
        x2_cove_emb = self.cove_encoder(x2_word_emb, x2_lengths)

        x1_emb = torch.cat([x1_word_emb, x1_cove_emb], dim=-1)
        x2_emb = torch.cat([x2_word_emb, x2_cove_emb], dim=-1)

        # Dropout on embeddings
        if self.args.dropout_emb > 0:
            x1_emb = nn.functional.dropout(x1_emb, p=self.args.dropout_emb,
                                           training=self.training)
            x2_emb = nn.functional.dropout(x2_emb, p=self.args.dropout_emb,
                                           training=self.training)
        # Form document encoding inputs
        drnn_input = [x1_emb]

        # Add attention-weighted question representation
        if self.args.use_qemb:
            x2_weighted_emb = self.qemb_match(x1_word_emb, x2_word_emb, x2_mask)  # batch * len_d
            drnn_input.append(x2_weighted_emb)

        # Add manual features
        if self.args.num_features > 0:
            drnn_input.append(x1_f)

        # Encode document with RNN shape: [batch, len_d, 2*hidden_size]
        low_level_doc_hiddens = self.reading_low_level_doc_rnn(torch.cat(drnn_input, 2), x1_mask)
        low_level_question_hiddens = self.reading_low_level_question_rnn(x2_emb, x2_mask)

        # Encode question with RNN shape: [batch, len_q, 2*hidden_size]
        high_level_doc_hiddens = self.reading_high_level_doc_rnn(low_level_doc_hiddens, x1_mask)
        high_level_question_hiddens = self.reading_high_level_question_rnn(low_level_question_hiddens, x2_mask)

        # Encode low_level_question_hiddens and high_level_question_hiddens shape:[batch, len_q, 2*hidden_size]
        understanding_question_hiddens = self.understanding_question_rnn(torch.cat([low_level_question_hiddens,
                                                                                    high_level_question_hiddens], 2),
                                                                         x2_mask)

        # history of word shape:[batch, len_d, history_of_word_size]
        history_of_doc_word = torch.cat([x1_word_emb, x1_cove_emb, low_level_doc_hiddens, high_level_doc_hiddens]
                                        , dim=2)
        # history of word shape:[batch, len_q, history_of_word_size]
        history_of_question_word = torch.cat([x2_word_emb, x2_cove_emb, low_level_question_hiddens,
                                              low_level_question_hiddens], dim=2)
        # # high_level_doc_hiddens
        # # fully-aware multi-level attention
        # low_level_similarity = self.low_level_matrix_attention(history_of_doc_word, history_of_question_word)
        # high_level_similarity = self.high_level_matrix_attention(history_of_doc_word, history_of_question_word)
        # understanding_similarity = self.understanding_matrix_attention(history_of_doc_word, history_of_question_word)
        #
        # # shape: [batch, len_d, len_q]
        # low_level_norm_sim = util.last_dim_softmax(low_level_similarity, x2_mask)
        # high_level_norm_sim = util.last_dim_softmax(high_level_similarity, x2_mask)
        # understanding_norm_sim = util.last_dim_softmax(understanding_similarity, x2_mask)
        #
        # # shape: [batch, len_d, 2*hidden_size]
        # low_level_doc_question_vectors = util.weighted_sum(low_level_question_hiddens, low_level_norm_sim)
        # high_level_doc_question_vectors = util.weighted_sum(high_level_question_hiddens, high_level_norm_sim)
        # understanding_doc_question_vectors = util.weighted_sum(understanding_question_hiddens, understanding_norm_sim)

        low_level_doc_question_vectors = self.low_level_matrix_attention_layer(
            history_of_doc_word, history_of_question_word, x2_mask, low_level_question_hiddens)
        high_level_doc_question_vectors = self.high_level_matrix_attention_layer(
            history_of_doc_word, history_of_question_word, x2_mask, high_level_question_hiddens)
        understanding_doc_question_vectors = self.understanding_matrix_attention_layer(
            history_of_doc_word, history_of_question_word, x2_mask, understanding_question_hiddens)


        # Encode multi-level hiddens and vectors
        fa_multi_level_doc_hiddens = self.multi_level_rnn(torch.cat([low_level_doc_hiddens, high_level_doc_hiddens,
                                                                     low_level_doc_question_vectors,
                                                                     high_level_doc_question_vectors,
                                                                     understanding_doc_question_vectors], dim=2),
                                                          x1_mask)
        # fa_multi_level_doc_hiddens = low_level_doc_question_vectors
        #
        history_of_doc_word2 = torch.cat([x1_word_emb, x1_cove_emb, low_level_doc_hiddens, high_level_doc_hiddens,
                                          low_level_doc_question_vectors, high_level_doc_question_vectors,
                                          understanding_doc_question_vectors, fa_multi_level_doc_hiddens], dim=2)

        # # shape: [batch, len_d, len_d]
        # self_boosted_similarity = self.self_boosted_matrix_attention(history_of_doc_word2, history_of_doc_word2)
        #
        # # shape: [batch, len_d, len_d]
        # self_boosted_norm_sim = util.last_dim_softmax(self_boosted_similarity, x1_mask)
        #
        # # shape: [batch, len_d, 2*hidden_size]
        # self_boosted_vectors = util.weighted_sum(fa_multi_level_doc_hiddens, self_boosted_norm_sim)

        self_boosted_vectors = self.self_boosted_matrix_attention_layer(
            history_of_doc_word2, history_of_doc_word2, x1_mask, fa_multi_level_doc_hiddens)


        # Encode vectors and hiddens
        # shape: [batch, len_d, 2*hidden_size]
        understanding_doc_hiddens = self.understanding_doc_rnn(torch.cat([fa_multi_level_doc_hiddens,
                                                                          self_boosted_vectors], dim=2), x1_mask)

        # understanding_doc_hiddens = fa_multi_level_doc_hiddens

        # shape: [batch, len_q]
        q_merge_weights = self.question_self_attn(understanding_question_hiddens, x2_mask)
        # shape: [batch, 2*hidden_size]
        question_hidden = layers.weighted_avg(understanding_question_hiddens, q_merge_weights)

        # Predict start and end positions
        # shape: [batch, len_d]  SOFTMAX NOT LOG_SOFTMAX
        start_scores = self.start_attn(understanding_doc_hiddens, question_hidden, x1_mask)
        # shape: [batch, 2*hidden_size]
        gru_input = layers.weighted_avg(understanding_doc_hiddens, start_scores)
        # shape: [batch, 1, 2*hidden_size]
        gru_input = gru_input.unsqueeze(1)
        # shape: [1, batch, 2*hidden_size]
        question_hidden = question_hidden.unsqueeze(0)
        _, memory_hidden = self.start_gru(gru_input, question_hidden)
        # shape: [batch, 2*hidden_size]
        memory_hidden = memory_hidden.squeeze(0)
        # shape: [batch, len_d]
        end_scores = self.end_attn(understanding_doc_hiddens, memory_hidden, x1_mask)
        # log start_scores
        if self.training:
            start_scores = torch.log(start_scores.add(1e-8))
            end_scores = torch.log(end_scores.add(1e-8))
        return start_scores, end_scores
Exemple #14
0
parser.add_argument('--data', default='.data', help='where to store data')
parser.add_argument('--embeddings', default='.embeddings', help='where to store embeddings')
args = parser.parse_args()

inputs = data.Field(lower=True, include_lengths=True, batch_first=True)

print('Generating train, dev, test splits')
train, dev, test = datasets.IWSLT.splits(root=args.data, exts=['.en', '.de'], fields=[inputs, inputs])
train_iter, dev_iter, test_iter = data.Iterator.splits(
            (train, dev, test), batch_size=100, device=torch.device(args.device) if args.device >= 0 else None)

print('Building vocabulary')
inputs.build_vocab(train, dev, test)
inputs.vocab.load_vectors(vectors=GloVe(name='840B', dim=300, cache=args.embeddings))

outputs_last_layer_cove = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors, model_cache=args.embeddings)
outputs_both_layer_cove = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors, layer0=True, model_cache=args.embeddings)
outputs_both_layer_cove_with_glove = MTLSTM(n_vocab=len(inputs.vocab), vectors=inputs.vocab.vectors, layer0=True, residual_embeddings=True, model_cache=args.embeddings)

if args.device >=0:
    outputs_last_layer_cove.cuda()
    outputs_both_layer_cove.cuda()
    outputs_both_layer_cove_with_glove.cuda()

train_iter.init_epoch()
print('Generating CoVe')
for batch_idx, batch in enumerate(train_iter):
    if batch_idx > 0:
        break
    last_layer_cove = outputs_last_layer_cove(*batch.src)
    print(last_layer_cove.size())
Exemple #15
0
class RnnDocReader(nn.Module):
    """Network for the Document Reader module of DrQA."""
    RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU, 'rnn': nn.RNN}

    def __init__(self,
                 opt,
                 padding_idx=0,
                 embedding=None,
                 normalize_emb=False,
                 embedding_order=True):
        super(RnnDocReader, self).__init__()
        # Store config
        self.opt = opt

        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding(embedding.size(0),
                                          embedding.size(1),
                                          padding_idx=padding_idx)
            if normalize_emb: normalize_emb_(embedding)
            self.embedding.weight.data = embedding

            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                for p in self.embedding.parameters():
                    p.requires_grad = False
            elif opt['tune_partial'] > 0:
                assert opt['tune_partial'] + 2 < embedding.size(0)
                fixed_embedding = embedding[opt['tune_partial'] + 2:]
                self.register_buffer('fixed_embedding', fixed_embedding)
                self.fixed_embedding = fixed_embedding
        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        if opt['pos']:
            self.pos_embedding = nn.Embedding(opt['pos_size'], opt['pos_dim'])
            if normalize_emb: normalize_emb_(self.pos_embedding.weight.data)
        if opt['ner']:
            self.ner_embedding = nn.Embedding(opt['ner_size'], opt['ner_dim'])
            if normalize_emb: normalize_emb_(self.ner_embedding.weight.data)
        # Projection for attention weighted question
        if opt['use_qemb']:
            self.qemb_match = layers.SeqAttnMatch(3 * opt['embedding_dim'])
        if opt['use_cove']:
            self.cove_embedding = MTLSTM(n_vocab=embedding.size(0),
                                         vectors=embedding.clone())
            if not opt['fine_tune']:
                for p in self.cove_embedding.parameters():
                    p.requires_grad = False

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt['embedding_dim'] + opt['num_features']
        question_input_size = opt['embedding_dim']
        if opt['use_qemb']:
            doc_input_size += opt['embedding_dim']
        if opt['pos']:
            doc_input_size += opt['pos_dim']
        if opt['ner']:
            doc_input_size += opt['ner_dim']
        if opt['use_cove']:
            # for Cove
            doc_input_size += 2 * opt['embedding_dim']
            question_input_size += 2 * opt['embedding_dim']

        print('doc_input_size:', doc_input_size)
        self.attention_rnns = custom.AttentionRNN(
            opt,
            doc_input_size=doc_input_size,
            question_input_size=question_input_size,
            ratio=opt['reduction_ratio'])

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt['hidden_size'] + opt['hidden_size'] // opt[
            'reduction_ratio']
        question_hidden_size = 2 * opt['hidden_size'] + opt[
            'hidden_size'] // opt['reduction_ratio']

        # Question merging
        if opt['question_merge'] not in ['avg', 'self_attn']:
            raise NotImplementedError('question_merge = %s' %
                                      opt['question_merge'])
        if opt['question_merge'] == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )

    def forward(self, x1, x1_f, x1_pos, x1_ner, x1_mask, x2, x2_mask, x1_order,
                x2_order):
        """Inputs:
        x1 = document word indices             [batch * len_d]
        x1_f = document word features indices  [batch * len_d * nfeat]
        x1_pos = document POS tags             [batch * len_d]
        x1_ner = document entity tags          [batch * len_d]
        x1_mask = document padding mask        [batch * len_d]
        x2 = question word indices             [batch * len_q]
        x2_mask = question padding mask        [batch * len_q]
        """
        # Embed both document and question
        x1_emb = self.embedding(x1)
        if self.opt['use_cove']:
            x1_emb_cove = self.cove_embedding(
                x1,
                torch.LongTensor(x1.size(0)).fill_(x1.size(1)).cuda())
        #x1_emb_order = self.embedding_order(x1_order)

        x2_emb = self.embedding(x2)
        if self.opt['use_cove']:
            x2_emb_cove = self.cove_embedding(
                x2,
                torch.LongTensor(x2.size(0)).fill_(x2.size(1)).cuda())
        #x2_emb += self.embedding_order(x2_order)

        if self.opt['dropout_emb'] > 0:
            x1_emb = nn.functional.dropout(x1_emb,
                                           p=self.opt['dropout_emb'],
                                           training=self.training)
            x2_emb = nn.functional.dropout(x2_emb,
                                           p=self.opt['dropout_emb'],
                                           training=self.training)

        x2_emb = torch.cat([x2_emb, x2_emb_cove], dim=2)
        x1_emb = torch.cat([x1_emb, x1_emb_cove], dim=2)

        drnn_input_list = [x1_emb, x1_f]
        # Add attention-weighted question representation
        if self.opt['use_qemb']:
            x2_weighted_emb = self.qemb_match(x1_emb, x2_emb, x2_mask)
            drnn_input_list.append(x2_weighted_emb)
        if self.opt['pos']:
            x1_pos_emb = self.pos_embedding(x1_pos)
            if self.opt['dropout_emb'] > 0:
                x1_pos_emb = nn.functional.dropout(x1_pos_emb,
                                                   p=self.opt['dropout_emb'],
                                                   training=self.training)
            drnn_input_list.append(x1_pos_emb)
        if self.opt['ner']:
            x1_ner_emb = self.ner_embedding(x1_ner)
            if self.opt['dropout_emb'] > 0:
                x1_ner_emb = nn.functional.dropout(x1_ner_emb,
                                                   p=self.opt['dropout_emb'],
                                                   training=self.training)
            drnn_input_list.append(x1_ner_emb)
        drnn_input = torch.cat(drnn_input_list, 2)
        #print('drnn_input:',drnn_input.size())
        # Encode document with RNN
        doc_hiddens, question_hiddens = self.attention_rnns(
            drnn_input, x1_mask, x2_emb, x2_mask)
        if self.opt['question_merge'] == 'avg':
            q_merge_weights = layers.uniform_weights(question_hiddens, x2_mask)
        elif self.opt['question_merge'] == 'self_attn':
            q_merge_weights = self.self_attn(question_hiddens, x2_mask)
        question_hidden = layers.weighted_avg(question_hiddens,
                                              q_merge_weights)

        start_scores = self.start_attn(doc_hiddens, question_hidden, x1_mask)
        end_scores = self.end_attn(doc_hiddens, question_hidden, x1_mask)
        return start_scores, end_scores
Exemple #16
0
    def __init__(self,
                 opt,
                 padding_idx=0,
                 embedding=None,
                 normalize_emb=False,
                 embedding_order=True):
        super(RnnDocReader, self).__init__()
        # Store config
        self.opt = opt

        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding(embedding.size(0),
                                          embedding.size(1),
                                          padding_idx=padding_idx)
            if normalize_emb: normalize_emb_(embedding)
            self.embedding.weight.data = embedding

            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                for p in self.embedding.parameters():
                    p.requires_grad = False
            elif opt['tune_partial'] > 0:
                assert opt['tune_partial'] + 2 < embedding.size(0)
                fixed_embedding = embedding[opt['tune_partial'] + 2:]
                self.register_buffer('fixed_embedding', fixed_embedding)
                self.fixed_embedding = fixed_embedding
        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        if opt['pos']:
            self.pos_embedding = nn.Embedding(opt['pos_size'], opt['pos_dim'])
            if normalize_emb: normalize_emb_(self.pos_embedding.weight.data)
        if opt['ner']:
            self.ner_embedding = nn.Embedding(opt['ner_size'], opt['ner_dim'])
            if normalize_emb: normalize_emb_(self.ner_embedding.weight.data)
        # Projection for attention weighted question
        if opt['use_qemb']:
            self.qemb_match = layers.SeqAttnMatch(3 * opt['embedding_dim'])
        if opt['use_cove']:
            self.cove_embedding = MTLSTM(n_vocab=embedding.size(0),
                                         vectors=embedding.clone())
            if not opt['fine_tune']:
                for p in self.cove_embedding.parameters():
                    p.requires_grad = False

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt['embedding_dim'] + opt['num_features']
        question_input_size = opt['embedding_dim']
        if opt['use_qemb']:
            doc_input_size += opt['embedding_dim']
        if opt['pos']:
            doc_input_size += opt['pos_dim']
        if opt['ner']:
            doc_input_size += opt['ner_dim']
        if opt['use_cove']:
            # for Cove
            doc_input_size += 2 * opt['embedding_dim']
            question_input_size += 2 * opt['embedding_dim']

        print('doc_input_size:', doc_input_size)
        self.attention_rnns = custom.AttentionRNN(
            opt,
            doc_input_size=doc_input_size,
            question_input_size=question_input_size,
            ratio=opt['reduction_ratio'])

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt['hidden_size'] + opt['hidden_size'] // opt[
            'reduction_ratio']
        question_hidden_size = 2 * opt['hidden_size'] + opt[
            'hidden_size'] // opt['reduction_ratio']

        # Question merging
        if opt['question_merge'] not in ['avg', 'self_attn']:
            raise NotImplementedError('question_merge = %s' %
                                      opt['question_merge'])
        if opt['question_merge'] == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
    def __init__(self, opt, padding_idx=0, embedding=None, normalize_emb=False,embedding_order=True):
        super(RnnDocReader, self).__init__()
        # Store config
        self.opt = opt
        '''
        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding(embedding.size(0),
                                          embedding.size(1),
                                          padding_idx=padding_idx)
            if normalize_emb: normalize_emb_(embedding)
            self.embedding.weight.data = embedding

            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                for p in self.embedding.parameters():
                    p.requires_grad = False
            elif opt['tune_partial'] > 0:
                assert opt['tune_partial'] + 2 < embedding.size(0)
                fixed_embedding = embedding[opt['tune_partial'] + 2:]
                self.register_buffer('fixed_embedding', fixed_embedding)
                self.fixed_embedding = fixed_embedding
        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        '''
        if opt['pos']:
            self.pos_embedding = nn.Embedding(opt['pos_size'], opt['pos_dim'])
            if normalize_emb: normalize_emb_(self.pos_embedding.weight.data)
        if opt['ner']:
            self.ner_embedding = nn.Embedding(opt['ner_size'], opt['ner_dim'])
            if normalize_emb: normalize_emb_(self.ner_embedding.weight.data)
        # Projection for attention weighted question
        if opt['use_qemb']:
            self.qemb_match = layers.SeqAttnMatch(3 * opt['embedding_dim'])
        if opt['use_cove']:
            self.cove_embedding = MTLSTM(n_vocab=embedding.size(0),vectors=embedding.clone(),residual_embeddings=True)
            if not opt['fine_tune']:
                for p in self.cove_embedding.parameters():
                    p.requires_grad=False

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt['embedding_dim'] + opt['num_features']
        question_input_size = opt['embedding_dim']
        if opt['use_qemb']:
            doc_input_size += opt['embedding_dim']
        if opt['pos']:
            doc_input_size += opt['pos_dim']
        if opt['ner']:
            doc_input_size += opt['ner_dim']
        if opt['use_cove']:
            # for Cove
            doc_input_size+=2* opt['embedding_dim']
            question_input_size += 2*opt['embedding_dim']

        print('doc_input_size:',doc_input_size)
        self.attention_rnns= custom.AttentionRNN(opt,doc_input_size=doc_input_size,question_input_size=question_input_size, ratio=opt['reduction_ratio'])

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt['hidden_size'] +opt['hidden_size']//opt['reduction_ratio']
        question_hidden_size =  2 * opt['hidden_size']+opt['hidden_size']//opt['reduction_ratio']

        # Question merging
        if opt['question_merge'] not in ['avg', 'self_attn']:
            raise NotImplementedError('question_merge = %s' % opt['question_merge'])
        if opt['question_merge'] == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)


        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
Exemple #18
0
# 安装CoVe

!python setup.py develop

import torch
from torchtext.vocab import GloVe
from cove import MTLSTM

# GloVe词表,维度为2.1*10^6*3

glove = GloVe(name='840B', dim=300, cache='.embeddings')

# 输入2个句子,每个句子中每个单词在词表中的编号

inputs = torch.LongTensor([[10, 2, 3, 0], [7, 8, 10, 3]]);inputs

# 两个句子的长度分别为3和4

lengths = torch.LongTensor([3, 4]);lengths

# CoVe类

cove = MTLSTM(n_vocab=glove.vectors.shape[0], vectors=glove.vectors, model_cache='.embeddings')

# 每个句子每个单词的CoVe编码,维度为2*4*600

outputs = cove(inputs, lengths);outputs

outputs.shape

    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``.
        label : torch.LongTensor, optional (default = None)
            A variable representing the label for each instance in the batch.
        Returns
        -------
        An output dictionary consisting of:
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_classes)`` representing a
            distribution over the label classes for each instance.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        text_mask = util.get_text_field_mask(tokens).float()
        # Pop elmo tokens, since elmo embedder should not be present.
        elmo_tokens = tokens.pop("elmo", None)
        if tokens:
            embedded_text = self._text_field_embedder(tokens)
        else:
            # only using "elmo" for input
            embedded_text = None

        # Add the "elmo" key back to "tokens" if not None, since the tests and the
        # subsequent training epochs rely not being modified during forward()
        if elmo_tokens is not None:
            tokens["elmo"] = elmo_tokens

        # Create ELMo embeddings if applicable
        if self._elmo:
            if elmo_tokens is not None:
                elmo_representations = self._elmo(elmo_tokens)["elmo_representations"]
                # Pop from the end is more performant with list
                if self._use_integrator_output_elmo:
                    integrator_output_elmo = elmo_representations.pop()
                if self._use_input_elmo:
                    input_elmo = elmo_representations.pop()
                assert not elmo_representations
            else:
                raise ConfigurationError(
                        "Model was built to use Elmo, but input text is not tokenized for Elmo.")

        if self._use_input_elmo:
            if embedded_text is not None:
                embedded_text = torch.cat([embedded_text, input_elmo], dim=-1)
            else:
                embedded_text = input_elmo

        # While using embeddings from the mt-cnn encoder, the hardcoded values for vocab_size can be initialsed appropriately
        if cnn:
            embedded_text_cnn = embedded_text
            enc = Encoder(7855, 300, 600, 5, 3, 0.25, 'cuda')
            dec = Decoder(5893, 300, 600, 5, 3, 0.25, 1, 'cuda')

            cnn_model = Seq2Seq(enc, dec).cuda()
            cnn_model.load_state_dict(torch.load('../cnn_lstm_model.pt'))
            cnn_model.eval()
            v1, v2 = cnn_model.encoder(embedded_text[:,:,:256])

            v3 = torch.cat((v1,v2),2)



            embedded_text = torch.cat((embedded_text_cnn,v3),2)

        # While using embeddings from the mt-lstm encoder (either load from the saved model from the paper or the reproduced model)
        elif lstm:
            outputs_both_layer_cove_with_glove = MTLSTM(n_vocab=None, vectors=None, layer0=True, residual_embeddings=True)
            outputs_both_layer_cove_with_glove.cuda()
            embedded_text = outputs_both_layer_cove_with_glove(embedded_text,[embedded_text.shape[1]]*embedded_text.shape[0])

        dropped_embedded_text = self._embedding_dropout(embedded_text)
        pre_encoded_text = self._pre_encode_feedforward(dropped_embedded_text)

        encoded_tokens = self._encoder(pre_encoded_text, text_mask)
        # Compute biattention. This is a special case since the inputs are the same.
        attention_logits = encoded_tokens.bmm(encoded_tokens.permute(0, 2, 1).contiguous())
        attention_weights = util.masked_softmax(attention_logits, text_mask)
        encoded_text = util.weighted_sum(encoded_tokens, attention_weights)

        # Build the input to the integrator
        integrator_input = torch.cat([encoded_tokens,
                                      encoded_tokens - encoded_text,
                                      encoded_tokens * encoded_text], 2)
        integrated_encodings = self._integrator(integrator_input, text_mask)

        # Concatenate ELMo representations to integrated_encodings if specified
        if self._use_integrator_output_elmo:
            integrated_encodings = torch.cat([integrated_encodings,
                                              integrator_output_elmo], dim=-1)

        # Simple Pooling layers
        max_masked_integrated_encodings = util.replace_masked_values(
                integrated_encodings, text_mask.unsqueeze(2), -1e7)
        max_pool = torch.max(max_masked_integrated_encodings, 1)[0]
        min_masked_integrated_encodings = util.replace_masked_values(
                integrated_encodings, text_mask.unsqueeze(2), +1e7)
        min_pool = torch.min(min_masked_integrated_encodings, 1)[0]
        mean_pool = torch.sum(integrated_encodings, 1) / torch.sum(text_mask, 1, keepdim=True)

        # Self-attentive pooling layer
        # Run through linear projection. Shape: (batch_size, sequence length, 1)
        # Then remove the last dimension to get the proper attention shape (batch_size, sequence length).
        self_attentive_logits = self._self_attentive_pooling_projection(
                integrated_encodings).squeeze(2)
        self_weights = util.masked_softmax(self_attentive_logits, text_mask)
        self_attentive_pool = util.weighted_sum(integrated_encodings, self_weights)

        pooled_representations = torch.cat([max_pool, min_pool, mean_pool, self_attentive_pool], 1)
        pooled_representations_dropped = self._integrator_dropout(pooled_representations)

        logits = self._output_layer(pooled_representations_dropped)
        class_probabilities = F.softmax(logits, dim=-1)

        output_dict = {'logits': logits, 'class_probabilities': class_probabilities}
        if label is not None:
            loss = self.loss(logits, label)
            for metric in self.metrics.values():
                metric(logits, label)
            output_dict["loss"] = loss

        return output_dict