Example #1
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args

        if self.args.use_elmo:
            self.embedding_dim = self.args.elmo_num_layer * 1024 
        else:
            self.embedding_dim = 300
            self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0)
            self.embedding.weight.data.fill_(0)
            self.embedding.weight.data[:2].normal_(0, 0.1)

        self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # question-aware choice representation
        self.q_c_emb_match = layers.SeqAttnMatch(self.embedding_dim) # choice-aware question representation
        
        # RNN question encoder: 2 * word emb + pos emb + ner emb + manual features + rel emb
        qst_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 4 + args.rel_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 2 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size)
     
        self.project = nn.Linear(2 * question_hidden_size + choice_hidden_size, 1)
Example #2
0
    def __init__(self, embedding_dim, hidden_dim, label_size, batch_size,
                 use_gpu, dropout_emb):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.use_gpu = use_gpu
        self.embedding_dim = embedding_dim
        self.dropout_emb = dropout_emb

        self.word_embeddings = nn.Embedding(len(vocab), embedding_dim)
        self.load_embeddings(vocab.tokens(), glove_data_file)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.hidden2label = nn.Linear(hidden_dim, label_size)
        self.hidden = self.init_hidden()

        self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim)
Example #3
0
    def __init__(self, args):
        super(simpleModel, self).__init__()

        self.args = args

        if self.args.use_elmo:
            self.embedding_dim = self.args.elmo_num_layer * 1024
        else:
            self.embedding_dim = 300
            self.embedding = nn.Embedding(len(vocab),
                                          self.embedding_dim,
                                          padding_idx=0)
            self.embedding.weight.data.fill_(0)
            self.embedding.weight.data[:2].normal_(0, 0.1)

        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.q_c_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # choice-aware question representation

        # RNN question encoder: 2 * word emb + rel emb
        qst_input_size = 2 * self.embedding_dim + args.rel_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # 2 * rnn emb (2 layers?) + pos emb + ner emb + manual features

        proj_input_size = 2 * args.hidden_size + args.pos_emb_dim + args.ner_emb_dim + 3 + args.rel_emb_dim

        self.project = nn.Linear(proj_input_size, 1)
Example #4
0
    def __init__(self, config):
        super(Reader, self).__init__()
        self.config = config.model

        #word embedding
        self.embedding = nn.Embedding(self.config.vocab_size,
                                      self.config.embedding_dim)

        #attention weighted question
        self.qemb_match = layers.SeqAttnMatch(self.config.embedding_dim)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)

        self.passage_input_size = self.config.embedding_dim + self.config.num_features + self.config.embedding_dim
        self.question_input_size = self.config.embedding_dim
        self.passage_encoder = layers.StackedBiLSTM(
            input_size=self.passage_input_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.passage_layers,
            dropout_rate=self.config.dropout_rate)

        self.question_encoder = layers.StackedBiLSTM(
            input_size=self.question_input_size,
            hidden_size=self.config.hidden_size,
            num_layers=self.config.question_layers,
            dropout_rate=self.config.dropout_rate)

        #question merging
        self.self_attn = layers.LinearSeqAttn(self.config.hidden_size)
        init.normal_(self.self_attn.linear.weight, 1)
        init.constant(self.self_attn.linear.bias, 0.1)

        #span start/end
        self.start_attn = layers.BilinearSeqAttn(self.config.hidden_size,
                                                 self.config.hidden_size)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)

        self.end_attn = layers.BilinearSeqAttn(self.config.hidden_size,
                                               self.config.hidden_size)
        init.normal_(self.qemb_match.linear.weight, 1)
        init.constant(self.qemb_match.linear.bias, 0.1)
Example #5
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)
        self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)
        self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim)

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 3 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn_output,
            dropout_output=True,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size)

        self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size)

        self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size)
        self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
Example #6
0
    def __init__(self, opt, padding_idx=0, embedding=None):
        super(ArticleReader, self).__init__()
        # Store config
        self.opt = opt

        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding.from_pretrained(embedding,
                                                          freeze=False)
            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                self.embedding.weight.requires_grad = False

        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        # Projection for attention weighted query
        if opt['use_t2_emb']:
            self.t2emb_match = layers.SeqAttnMatch(opt['embedding_dim'])

        # Input size to RNN: word emb + query emb + manual features
        article_input_size = opt['embedding_dim'] + opt['num_features']
        if opt['use_t2_emb']:
            article_input_size += opt['embedding_dim']
        if opt['pos']:
            article_input_size += opt['pos_size']
        if opt['ner']:
            article_input_size += opt['ner_size']

        # self.self_attn = layers.SelfAttention(opt, opt['embedding_dim'])
        # doc_input_size += opt['embedding_dim']

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=opt['dropout_linear'])

        # Stacked LSTM article encoder
        self.article = layers.StackedEncoder(
            input_size=article_input_size,
            hidden_size=opt['hidden_size'],
            num_layers=opt['t1_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']])

        # Stacked LSTM query encoder
        self.query = layers.StackedEncoder(
            input_size=opt['embedding_dim'],
            hidden_size=opt['hidden_size'],
            num_layers=opt['t2_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']])

        # Output sizes of Stacked LSTM Encoders
        article_hidden_size = 2 * opt[
            'hidden_size']  # article_hidden_size = 256
        query_hidden_size = 2 * opt['hidden_size']  # query_hidden_size = 256
        if opt['concat_rnn_layers']:
            article_hidden_size *= opt[
                't1_layers']  # article_hidden_size = 768
            query_hidden_size *= opt['t2_layers']  # query_hidden_size = 768

        self.single_encoder = layers.SingleEncoder(opt, article_hidden_size)

        # self.decoder = layers.Decoder(2 * article_hidden_size, article_hidden_size, opt['embedding_dim'],
        #                               self.opt['vocab_size'], n_layers=2)

        # encoders_dim = 2 * article_hidden_size  # seq_in_size = 1536
        # self.attention = layers.Attention2(encoders_dim)

        seq_in_size = 2 * article_hidden_size  # seq_in_size = 1536, as encoder is bi-directional
        merge_size = 2 * seq_in_size  #  merging both encoder, seq_in_size = 3072
        lin_config = [merge_size] * 2
        self.out = nn.Sequential(Linear(*lin_config), self.relu, self.dropout,
                                 Linear(*lin_config), self.relu, self.dropout,
                                 Linear(*lin_config), self.relu, self.dropout,
                                 Linear(merge_size, opt['d_out']))
Example #7
0
    def __init__(self, args, normalize=True):
        super(MnemonicReader, self).__init__()
        # Store config
        self.args = args

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(args.vocab_size,
                                      args.embedding_dim,
                                      padding_idx=0)

        # Char embeddings (+1 for padding)
        self.char_embedding = nn.Embedding(args.char_size,
                                           args.char_embedding_dim,
                                           padding_idx=0)

        # Char rnn to generate char features
        self.char_rnn = layers.StackedBRNN(
            input_size=args.char_embedding_dim,
            hidden_size=args.char_hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        doc_input_size = args.embedding_dim + args.char_hidden_size * 2 + args.num_features

        # Encoder
        self.encoding_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        doc_hidden_size = 2 * args.hidden_size

        # Interactive aligning, self aligning and aggregating
        self.interactive_aligners = nn.ModuleList()
        self.interactive_SFUs = nn.ModuleList()
        self.self_aligners = nn.ModuleList()
        self.self_SFUs = nn.ModuleList()
        self.aggregate_rnns = nn.ModuleList()
        for i in range(args.hop):
            # interactive aligner
            self.interactive_aligners.append(
                layers.SeqAttnMatch(doc_hidden_size, identity=True))
            self.interactive_SFUs.append(
                layers.SFU(doc_hidden_size, 3 * doc_hidden_size))
            # self aligner
            self.self_aligners.append(
                layers.SelfAttnMatch(doc_hidden_size,
                                     identity=True,
                                     diag=False))
            self.self_SFUs.append(
                layers.SFU(doc_hidden_size, 3 * doc_hidden_size))
            # aggregating
            self.aggregate_rnns.append(
                layers.StackedBRNN(
                    input_size=doc_hidden_size,
                    hidden_size=args.hidden_size,
                    num_layers=1,
                    dropout_rate=args.dropout_rnn,
                    dropout_output=args.dropout_rnn_output,
                    concat_layers=False,
                    rnn_type=self.RNN_TYPES[args.rnn_type],
                    padding=args.rnn_padding,
                ))

        # Memmory-based Answer Pointer
        self.mem_ans_ptr = layers.MemoryAnsPointer(
            x_size=2 * args.hidden_size,
            y_size=2 * args.hidden_size,
            hidden_size=args.hidden_size,
            hop=args.hop,
            dropout_rate=args.dropout_rnn,
            normalize=normalize)
Example #8
0
    def __init__(self, args, normalize=True):
        super(R_Net, self).__init__()
        # Store config
        self.args = args

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(args.vocab_size,
                                      args.embedding_dim,
                                      padding_idx=0)

        # Char embeddings (+1 for padding)
        self.char_embedding = nn.Embedding(args.char_size,
                                      args.char_embedding_dim,
                                      padding_idx=0)

        # Char rnn to generate char features
        self.char_rnn = layers.StackedBRNN(
            input_size=args.char_embedding_dim,
            hidden_size=args.char_hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=False,
        )

        doc_input_size = args.embedding_dim + args.char_hidden_size * 2

        # Encoder
        self.encode_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=args.concat_rnn_layers,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        # Output sizes of rnn encoder
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        if args.concat_rnn_layers:
            doc_hidden_size *= args.doc_layers
            question_hidden_size *= args.question_layers
        
        # Gated-attention-based RNN of the whole question
        self.question_attn = layers.SeqAttnMatch(question_hidden_size, identity=False)
        self.question_attn_gate = layers.Gate(doc_hidden_size + question_hidden_size)
        self.question_attn_rnn = layers.StackedBRNN(
            input_size=doc_hidden_size + question_hidden_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        question_attn_hidden_size = 2 * args.hidden_size

        # Self-matching-attention-baed RNN of the whole doc
        self.doc_self_attn = layers.SelfAttnMatch(question_attn_hidden_size, identity=False)
        self.doc_self_attn_gate = layers.Gate(question_attn_hidden_size + question_attn_hidden_size)
        self.doc_self_attn_rnn = layers.StackedBRNN(
            input_size=question_attn_hidden_size + question_attn_hidden_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        doc_self_attn_hidden_size = 2 * args.hidden_size

        self.doc_self_attn_rnn2 = layers.StackedBRNN(
            input_size=doc_self_attn_hidden_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        self.ptr_net = layers.PointerNetwork(
            x_size = doc_self_attn_hidden_size, 
            y_size = question_hidden_size, 
            hidden_size = args.hidden_size, 
            dropout_rate=args.dropout_rnn,
            cell_type=nn.GRUCell,
            normalize=normalize
        )
Example #9
0
    def __init__(self, args, normalize=True):
        super(RnnDocReader, self).__init__()
        # Store config
        self.args = args

        # Word embeddings (+1 for padding)
        self.embedding = nn.Embedding(args.vocab_size,
                                      args.embedding_dim,
                                      padding_idx=0)

        # Projection for attention weighted question
        if args.use_qemb:
            self.qemb_match = layers.SeqAttnMatch(args.embedding_dim)

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = args.embedding_dim + args.num_features
        if args.use_qemb:
            doc_input_size += args.embedding_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=args.concat_rnn_layers,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        # RNN question encoder
        self.question_rnn = layers.StackedBRNN(
            input_size=args.embedding_dim,
            hidden_size=args.hidden_size,
            num_layers=args.question_layers,
            dropout_rate=args.dropout_rnn,
            dropout_output=args.dropout_rnn_output,
            concat_layers=args.concat_rnn_layers,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding,
        )

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        if args.concat_rnn_layers:
            doc_hidden_size *= args.doc_layers
            question_hidden_size *= args.question_layers

        # Question merging
        if args.question_merge not in ['avg', 'self_attn']:
            raise NotImplementedError('merge_mode = %s' % args.merge_mode)
        if args.question_merge == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
            normalize=normalize,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
            normalize=normalize,
        )
    def __init__(self,
                 opt,
                 char_embedding,
                 padding_idx=0,
                 normalize_emb=False):
        super(RnnDocReader, self).__init__()
        # Store config
        self.opt = opt

        char_embedding = torch.FloatTensor(char_embedding)
        self.char_embedding = nn.Embedding(char_embedding.size(0),
                                           char_embedding.size(1),
                                           padding_idx=padding_idx)
        self.char_embedding.weight.data = char_embedding

        # Projection for attention weighted question
        if opt.use_qemb:
            self.qemb_match = layers.SeqAttnMatch(opt.char_emb_dim * 2)
            self.qemb_match_ds = layers.SeqAttnMatch(opt.char_emb_dim * 2)

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt.char_emb_dim * 2 + opt.num_features
        if opt.use_qemb:
            doc_input_size += opt.char_emb_dim * 2
        if opt.use_interaction:
            self.qhiden_match = layers.SeqAttnMatch(opt.hidden_size * 2)
            self.qhiden_match_ds = layers.SeqAttnMatch(opt.hidden_size * 2)

        self.char_rnn = layers.StackedBRNN(
            input_size=opt.char_emb_dim,
            hidden_size=opt.char_emb_dim,
            num_layers=1,
            dropout_rate=opt.dropout_rnn,
            dropout_output=opt.dropout_rnn_output,
            concat_layers=opt.concat_rnn_layers,
            use_tanh=True,  #
            bidirectional=True)
        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=opt.hidden_size,
            num_layers=opt.doc_layers,
            dropout_rate=opt.dropout_rnn,
            dropout_output=opt.dropout_rnn_output,
            concat_layers=opt.concat_rnn_layers,
            res_net=opt.res_net)
        question_input_size = opt.char_emb_dim * 2
        # RNN question encoder
        self.question_rnn = layers.StackedBRNN(
            input_size=question_input_size,
            hidden_size=opt.hidden_size,
            num_layers=opt.question_layers,
            dropout_rate=opt.dropout_rnn,
            dropout_output=opt.dropout_rnn_output,
            concat_layers=opt.concat_rnn_layers,
            res_net=opt.res_net)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt.hidden_size
        question_hidden_size = 2 * opt.hidden_size
        if opt.concat_rnn_layers:
            doc_hidden_size *= opt.doc_layers
            question_hidden_size *= opt.question_layers

        match_in_dim = opt.hidden_size * 2 * 2
        # Bilinear attention for span start/end
        self.s_linear = nn.Linear(match_in_dim, 1)
        self.e_linear = nn.Linear(match_in_dim + 1, 1)
Example #11
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args
        self.embedding_dim = 300
        self.embedding = nn.Embedding(len(vocab),
                                      self.embedding_dim,
                                      padding_idx=0)
        self.embedding.weight.data.fill_(0)
        self.embedding.weight.data[:2].normal_(0, 0.1)
        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim)

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + args.rel_emb_dim

        # Max passage size
        p_max_size = args.p_max_size
        self.p_max_size = p_max_size

        # Max question size
        q_max_size = args.q_max_size
        self.q_max_size = q_max_size

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        self.doc_hidden_size = doc_hidden_size
        question_hidden_size = 2 * args.hidden_size
        self.question_hidden_size = question_hidden_size
        # print('p_mask : ' , doc_input_size)

        # Attention over passage and question
        self.q_self_attn_start = layers.LinearSeqAttn(question_hidden_size,
                                                      q_max_size)
        self.p_q_attn_start = layers.BilinearSeqAttn(p_max_size, q_max_size,
                                                     p_max_size)

        self.q_self_attn_end = layers.LinearSeqAttn(question_hidden_size,
                                                    q_max_size)
        self.p_q_attn_end = layers.BilinearSeqAttn(p_max_size, q_max_size,
                                                   p_max_size)

        # Bilinear layer and sigmoid to proba
        self.p_q_bilinear_start = nn.Bilinear(question_hidden_size,
                                              question_hidden_size, 1)
        self.p_q_bilinear_end = nn.Bilinear(question_hidden_size,
                                            question_hidden_size, 1)
        self.p_linear_start = nn.Linear(question_hidden_size, 1)
        self.p_linear_end = nn.Linear(question_hidden_size, 1)
        # Attention start end
        self.start_end_attn = layers.BilinearProbaAttn(p_max_size)
        self.end_start_attn = layers.BilinearProbaAttn(p_max_size)

        # Feed forward
        self.feedforward_start = layers.NeuralNet(p_max_size, p_max_size,
                                                  p_max_size)
        self.feedforward_end = layers.NeuralNet(p_max_size, p_max_size,
                                                p_max_size)
Example #12
0
    def __init__(self, args):
        super(TriAN, self).__init__()
        self.args = args

        if self.args.use_elmo:
            self.embedding_dim = self.args.elmo_num_layer * 1024
        else:
            self.embedding_dim = 300
            self.embedding = nn.Embedding(
                len(vocab), self.embedding_dim,
                padding_idx=0)  # len is same as vocab size
            self.embedding.weight.data.fill_(0)
            self.embedding.weight.data[:2].normal_(0, 0.1)  # initialize

        self.pos_embedding = nn.Embedding(len(pos_vocab),
                                          args.pos_emb_dim,
                                          padding_idx=0)
        self.pos_embedding.weight.data.normal_(0, 0.1)
        self.ner_embedding = nn.Embedding(len(ner_vocab),
                                          args.ner_emb_dim,
                                          padding_idx=0)
        self.ner_embedding.weight.data.normal_(0, 0.1)
        self.rel_embedding = nn.Embedding(len(rel_vocab),
                                          args.rel_emb_dim,
                                          padding_idx=0)
        self.rel_embedding.weight.data.normal_(0, 0.1)
        self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU}

        self.p_q_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # question-aware passage representation
        self.c_q_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # question-aware choice representation
        self.c_p_emb_match = layers.SeqAttnMatch(
            self.embedding_dim)  # passage-aware choice representation

        # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features
        doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 3 + 2 * args.rel_emb_dim

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=args.hidden_size,
            num_layers=args.doc_layers,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN question encoder: word emb + pos emb
        qst_input_size = self.embedding_dim + args.pos_emb_dim
        self.question_rnn = layers.StackedBRNN(
            input_size=qst_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # RNN answer encoder
        choice_input_size = 3 * self.embedding_dim
        self.choice_rnn = layers.StackedBRNN(
            input_size=choice_input_size,
            hidden_size=args.hidden_size,
            num_layers=1,
            dropout_rate=0,
            dropout_output=args.dropout_rnn_output,
            concat_layers=False,
            rnn_type=self.RNN_TYPES[args.rnn_type],
            padding=args.rnn_padding)

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * args.hidden_size
        question_hidden_size = 2 * args.hidden_size
        choice_hidden_size = 2 * args.hidden_size

        # Answer merging
        self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size)
        self.q_self_attn = layers.LinearSeqAttn(question_hidden_size +
                                                1)  # add essential term flag

        self.c_diff_attn = layers.DiffSeqAttn(choice_hidden_size)

        self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size,
                                               y_size=question_hidden_size)

        #self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size)
        #self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
        self.p_c_bilinear = nn.Linear(2 * doc_hidden_size,
                                      3 * choice_hidden_size)
        self.q_c_bilinear = nn.Linear(2 * question_hidden_size,
                                      3 * choice_hidden_size)
Example #13
0
    def __init__(self, opt, padding_idx=0, embedding=None):
        super(DRQA, self).__init__()
        # Store config
        self.opt = opt

        # Word embeddings
        if opt['pretrained_words']:
            assert embedding is not None
            self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False)
            if opt['fix_embeddings']:
                assert opt['tune_partial'] == 0
                self.embedding.weight.requires_grad = False
            elif opt['tune_partial'] > 0:
                assert opt['tune_partial'] + 2 < embedding.size(0)
                offset = self.opt['tune_partial'] + 2

                def embedding_hook(grad, offset=offset):
                    grad[offset:] = 0
                    return grad

                self.embedding.weight.register_hook(embedding_hook)

        else:  # random initialized
            self.embedding = nn.Embedding(opt['vocab_size'],
                                          opt['embedding_dim'],
                                          padding_idx=padding_idx)
        # Projection for attention weighted question
        if opt['use_qemb']:
            self.qemb_match = layers.SeqAttnMatch(opt['embedding_dim'])

        # Input size to RNN: word emb + question emb + manual features
        doc_input_size = opt['embedding_dim'] + opt['num_features']
        if opt['use_qemb']:
            doc_input_size += opt['embedding_dim']
        if opt['pos']:
            doc_input_size += opt['pos_size']
        if opt['ner']:
            doc_input_size += opt['ner_size']

        # RNN document encoder
        self.doc_rnn = layers.StackedBRNN(
            input_size=doc_input_size,
            hidden_size=opt['hidden_size'],
            num_layers=opt['doc_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']],
            padding=opt['rnn_padding'],
        )

        # RNN question encoder
        self.question_rnn = layers.StackedBRNN(
            input_size=opt['embedding_dim'],
            hidden_size=opt['hidden_size'],
            num_layers=opt['question_layers'],
            dropout_rate=opt['dropout_rnn'],
            dropout_output=opt['dropout_rnn_output'],
            concat_layers=opt['concat_rnn_layers'],
            rnn_type=self.RNN_TYPES[opt['rnn_type']],
            padding=opt['rnn_padding'],
        )

        # Output sizes of rnn encoders
        doc_hidden_size = 2 * opt['hidden_size']
        question_hidden_size = 2 * opt['hidden_size']
        if opt['concat_rnn_layers']:
            doc_hidden_size *= opt['doc_layers']
            question_hidden_size *= opt['question_layers']

        # Question merging
        if opt['question_merge'] not in ['avg', 'self_attn']:
            raise NotImplementedError('question_merge = %s' % opt['question_merge'])
        if opt['question_merge'] == 'self_attn':
            self.self_attn = layers.LinearSeqAttn(question_hidden_size)

        # Bilinear attention for span start/end
        self.start_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )
        self.end_attn = layers.BilinearSeqAttn(
            doc_hidden_size,
            question_hidden_size,
        )