Ejemplo n.º 1
0
    def __init__(self, config):
        super(Reader, self).__init__()
        self.config = config.model

        #word embedding
        self.embedding = nn.Embedding(self.config.vocab_size,
                                      self.config.embedding_dim)

        #attention weighted question
        self.qemb_match = layers.SeqAttnMatch(self.config.embedding_dim)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)

        self.passage_input_size = self.config.embedding_dim + self.config.num_features + self.config.embedding_dim
        self.question_input_size = self.config.embedding_dim
        self.passage_encoder = layers.StackedBiLSTM(
            input_size=self.passage_input_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.passage_layers,
            dropout_rate=self.config.dropout_rate)

        self.question_encoder = layers.StackedBiLSTM(
            input_size=self.question_input_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.question_layers,
            dropout_rate=self.config.dropout_rate)

        #question merging
        self.self_attn = layers.LinearSeqAttn(self.config.hidden_size)
        init.normal_(self.self_attn.linear.weight, 1)
        init.constant(self.self_attn.linear.bias, 0.1)

        #span start/end
        self.start_attn = layers.BilinearSeqAttn(self.config.hidden_size,
                                                 self.config.hidden_size)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)

        self.end_attn = layers.BilinearSeqAttn(self.config.hidden_size,
                                               self.config.hidden_size)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)
Ejemplo n.º 2
0
    def __init__(self,
                 word_vectors,
                 hidden_size,
                 drop_prob=0.,
                 attention_type="BiDAF",
                 train_embeddings=False):
        super(QA_Model, self).__init__()

        ATTENTION_TYPES = {
            'DotProduct': layers.DotProductAttention,
            'Bilinear': layers.BilinearSeqAttn,
            'BiDAF': layers.BiDAFAttention
        }
        self.attention_type = ATTENTION_TYPES[attention_type]
        self.embedding_size = word_vectors.shape[1]
        self.embedding = nn.Embedding(word_vectors.shape[0],
                                      self.embedding_size)
        self.embedding.weight.data.copy_(word_vectors)
        self.embedding.weight.requires_grad = train_embeddings

        self.encoder = layers.LSTMEncoder(input_size=self.embedding_size,
                                          hidden_size=hidden_size,
                                          num_layers=1,
                                          drop_prob=drop_prob)

        attention_output_size = hidden_size

        if self.attention_type == ATTENTION_TYPES['DotProduct']:
            self.att = layers.DotProductAttention(2 * hidden_size)
        elif self.attention_type == ATTENTION_TYPES['Bilinear']:
            self.att = layers.BilinearSeqAttn(2 * hidden_size, 2 * hidden_size)
        elif self.attention_type == ATTENTION_TYPES['BiDAF']:
            self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size,
                                             drop_prob=drop_prob)
            attention_output_size *= 2  # BiDAFAttention output is larger

        self.out = layers.SoftmaxOutputLayer(hidden_size=attention_output_size,
                                             drop_prob=drop_prob)
Ejemplo n.º 3
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)
        self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim)

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 3 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size)

        self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size)

        self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size)
        self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
Ejemplo n.º 4
0
    def __init__(self, args, normalize=True):
        super(RnnDocReader, self).__init__()
        # Store config
        self.args = args

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(args.vocab_size,
                                      args.embedding_dim,
                                      padding_idx=0)

        # Projection for attention weighted question
        if args.use_qemb:
            self.qemb_match = layers.SeqAttnMatch(args.embedding_dim)

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = args.embedding_dim + args.num_features
        if args.use_qemb:
            doc_input_size += args.embedding_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=args.concat_rnn_layers,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        # RNN question encoder
        self.question_rnn = layers.StackedBRNN(
            input_size=args.embedding_dim,
            hidden_size=args.hidden_size,
            num_layers=args.question_layers,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=args.concat_rnn_layers,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        if args.concat_rnn_layers:
            doc_hidden_size *= args.doc_layers
            question_hidden_size *= args.question_layers

        # Question merging
        if args.question_merge not in ['avg', 'self_attn']:
            raise NotImplementedError('merge_mode = %s' % args.merge_mode)
        if args.question_merge == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
            normalize=normalize,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
            normalize=normalize,
        )
Ejemplo n.º 5
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab),
                                      self.embedding_dim,
                                      padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)
        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + args.rel_emb_dim

        # Max passage size
        p_max_size = args.p_max_size
        self.p_max_size = p_max_size

        # Max question size
        q_max_size = args.q_max_size
        self.q_max_size = q_max_size

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        self.doc_hidden_size = doc_hidden_size
        question_hidden_size = 2 * args.hidden_size
        self.question_hidden_size = question_hidden_size
        # print('p_mask : ' , doc_input_size)

        # Attention over passage and question
        self.q_self_attn_start = layers.LinearSeqAttn(question_hidden_size,
                                                      q_max_size)
        self.p_q_attn_start = layers.BilinearSeqAttn(p_max_size, q_max_size,
                                                     p_max_size)

        self.q_self_attn_end = layers.LinearSeqAttn(question_hidden_size,
                                                    q_max_size)
        self.p_q_attn_end = layers.BilinearSeqAttn(p_max_size, q_max_size,
                                                   p_max_size)

        # Bilinear layer and sigmoid to proba
        self.p_q_bilinear_start = nn.Bilinear(question_hidden_size,
                                              question_hidden_size, 1)
        self.p_q_bilinear_end = nn.Bilinear(question_hidden_size,
                                            question_hidden_size, 1)
        self.p_linear_start = nn.Linear(question_hidden_size, 1)
        self.p_linear_end = nn.Linear(question_hidden_size, 1)
        # Attention start end
        self.start_end_attn = layers.BilinearProbaAttn(p_max_size)
        self.end_start_attn = layers.BilinearProbaAttn(p_max_size)

        # Feed forward
        self.feedforward_start = layers.NeuralNet(p_max_size, p_max_size,
                                                  p_max_size)
        self.feedforward_end = layers.NeuralNet(p_max_size, p_max_size,
                                                p_max_size)
Ejemplo n.º 6
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args

        if self.args.use_elmo:
            self.embedding_dim = self.args.elmo_num_layer * 1024
        else:
            self.embedding_dim = 300
            self.embedding = nn.Embedding(
                len(vocab), self.embedding_dim,
                padding_idx=0)  # len is same as vocab size
            self.embedding.weight.data.fill_(0)
            self.embedding.weight.data[:2].normal_(0, 0.1)  # initialize

        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # question-aware passage representation
        self.c_q_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # question-aware choice representation
        self.c_p_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # passage-aware choice representation

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 3 + 2 * args.rel_emb_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 3 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size +
                                                1)  # add essential term flag

        self.c_diff_attn = layers.DiffSeqAttn(choice_hidden_size)

        self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size,
                                               y_size=question_hidden_size)

        #self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size)
        #self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
        self.p_c_bilinear = nn.Linear(2 * doc_hidden_size,
                                      3 * choice_hidden_size)
        self.q_c_bilinear = nn.Linear(2 * question_hidden_size,
                                      3 * choice_hidden_size)
Ejemplo n.º 7
0
    def __init__(self, opt, padding_idx=0, embedding=None):
        super(DRQA, self).__init__()
        # Store config
        self.opt = opt

        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False)
            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                self.embedding.weight.requires_grad = False
            elif opt['tune_partial'] > 0:
                assert opt['tune_partial'] + 2 < embedding.size(0)
                offset = self.opt['tune_partial'] + 2

                def embedding_hook(grad, offset=offset):
                    grad[offset:] = 0
                    return grad

                self.embedding.weight.register_hook(embedding_hook)

        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        # Projection for attention weighted question
        if opt['use_qemb']:
            self.qemb_match = layers.SeqAttnMatch(opt['embedding_dim'])

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt['embedding_dim'] + opt['num_features']
        if opt['use_qemb']:
            doc_input_size += opt['embedding_dim']
        if opt['pos']:
            doc_input_size += opt['pos_size']
        if opt['ner']:
            doc_input_size += opt['ner_size']

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=opt['hidden_size'],
            num_layers=opt['doc_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']],
            padding=opt['rnn_padding'],
        )

        # RNN question encoder
        self.question_rnn = layers.StackedBRNN(
            input_size=opt['embedding_dim'],
            hidden_size=opt['hidden_size'],
            num_layers=opt['question_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']],
            padding=opt['rnn_padding'],
        )

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt['hidden_size']
        question_hidden_size = 2 * opt['hidden_size']
        if opt['concat_rnn_layers']:
            doc_hidden_size *= opt['doc_layers']
            question_hidden_size *= opt['question_layers']

        # Question merging
        if opt['question_merge'] not in ['avg', 'self_attn']:
            raise NotImplementedError('question_merge = %s' % opt['question_merge'])
        if opt['question_merge'] == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )