def __init__(self, args): super(TriAN, self).__init__() self.args = args if self.args.use_elmo: self.embedding_dim = self.args.elmo_num_layer * 1024 else: self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # question-aware choice representation self.q_c_emb_match = layers.SeqAttnMatch(self.embedding_dim) # choice-aware question representation # RNN question encoder: 2 * word emb + pos emb + ner emb + manual features + rel emb qst_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 4 + args.rel_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 2 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size) self.project = nn.Linear(2 * question_hidden_size + choice_hidden_size, 1)
def __init__(self, embedding_dim, hidden_dim, label_size, batch_size, use_gpu, dropout_emb): super(LSTMClassifier, self).__init__() self.hidden_dim = hidden_dim self.batch_size = batch_size self.use_gpu = use_gpu self.embedding_dim = embedding_dim self.dropout_emb = dropout_emb self.word_embeddings = nn.Embedding(len(vocab), embedding_dim) self.load_embeddings(vocab.tokens(), glove_data_file) self.lstm = nn.LSTM(embedding_dim, hidden_dim) self.hidden2label = nn.Linear(hidden_dim, label_size) self.hidden = self.init_hidden() self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim)
def __init__(self, args): super(simpleModel, self).__init__() self.args = args if self.args.use_elmo: self.embedding_dim = self.args.elmo_num_layer * 1024 else: self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.q_c_emb_match = layers.SeqAttnMatch( self.embedding_dim) # choice-aware question representation # RNN question encoder: 2 * word emb + rel emb qst_input_size = 2 * self.embedding_dim + args.rel_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # 2 * rnn emb (2 layers?) + pos emb + ner emb + manual features proj_input_size = 2 * args.hidden_size + args.pos_emb_dim + args.ner_emb_dim + 3 + args.rel_emb_dim self.project = nn.Linear(proj_input_size, 1)
def __init__(self, config): super(Reader, self).__init__() self.config = config.model #word embedding self.embedding = nn.Embedding(self.config.vocab_size, self.config.embedding_dim) #attention weighted question self.qemb_match = layers.SeqAttnMatch(self.config.embedding_dim) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1) self.passage_input_size = self.config.embedding_dim + self.config.num_features + self.config.embedding_dim self.question_input_size = self.config.embedding_dim self.passage_encoder = layers.StackedBiLSTM( input_size=self.passage_input_size, hidden_size=self.config.hidden_size, num_layers=self.config.passage_layers, dropout_rate=self.config.dropout_rate) self.question_encoder = layers.StackedBiLSTM( input_size=self.question_input_size, hidden_size=self.config.hidden_size, num_layers=self.config.question_layers, dropout_rate=self.config.dropout_rate) #question merging self.self_attn = layers.LinearSeqAttn(self.config.hidden_size) init.normal_(self.self_attn.linear.weight, 1) init.constant(self.self_attn.linear.bias, 0.1) #span start/end self.start_attn = layers.BilinearSeqAttn(self.config.hidden_size, self.config.hidden_size) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1) self.end_attn = layers.BilinearSeqAttn(self.config.hidden_size, self.config.hidden_size) init.normal_(self.qemb_match.linear.weight, 1) init.constant(self.qemb_match.linear.bias, 0.1)
def __init__(self, args): super(TriAN, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) self.c_p_emb_match = layers.SeqAttnMatch(self.embedding_dim) # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + 2 * args.rel_emb_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 3 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn_output, dropout_output=True, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size) self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size) self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size) self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size)
def __init__(self, opt, padding_idx=0, embedding=None): super(ArticleReader, self).__init__() # Store config self.opt = opt # Word embeddings if opt['pretrained_words']: assert embedding is not None self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False) if opt['fix_embeddings']: assert opt['tune_partial'] == 0 self.embedding.weight.requires_grad = False else: # random initialized self.embedding = nn.Embedding(opt['vocab_size'], opt['embedding_dim'], padding_idx=padding_idx) # Projection for attention weighted query if opt['use_t2_emb']: self.t2emb_match = layers.SeqAttnMatch(opt['embedding_dim']) # Input size to RNN: word emb + query emb + manual features article_input_size = opt['embedding_dim'] + opt['num_features'] if opt['use_t2_emb']: article_input_size += opt['embedding_dim'] if opt['pos']: article_input_size += opt['pos_size'] if opt['ner']: article_input_size += opt['ner_size'] # self.self_attn = layers.SelfAttention(opt, opt['embedding_dim']) # doc_input_size += opt['embedding_dim'] self.relu = nn.ReLU() self.dropout = nn.Dropout(p=opt['dropout_linear']) # Stacked LSTM article encoder self.article = layers.StackedEncoder( input_size=article_input_size, hidden_size=opt['hidden_size'], num_layers=opt['t1_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']]) # Stacked LSTM query encoder self.query = layers.StackedEncoder( input_size=opt['embedding_dim'], hidden_size=opt['hidden_size'], num_layers=opt['t2_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']]) # Output sizes of Stacked LSTM Encoders article_hidden_size = 2 * opt[ 'hidden_size'] # article_hidden_size = 256 query_hidden_size = 2 * opt['hidden_size'] # query_hidden_size = 256 if opt['concat_rnn_layers']: article_hidden_size *= opt[ 't1_layers'] # article_hidden_size = 768 query_hidden_size *= opt['t2_layers'] # query_hidden_size = 768 self.single_encoder = layers.SingleEncoder(opt, article_hidden_size) # self.decoder = layers.Decoder(2 * article_hidden_size, article_hidden_size, opt['embedding_dim'], # self.opt['vocab_size'], n_layers=2) # encoders_dim = 2 * article_hidden_size # seq_in_size = 1536 # self.attention = layers.Attention2(encoders_dim) seq_in_size = 2 * article_hidden_size # seq_in_size = 1536, as encoder is bi-directional merge_size = 2 * seq_in_size # merging both encoder, seq_in_size = 3072 lin_config = [merge_size] * 2 self.out = nn.Sequential(Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(merge_size, opt['d_out']))
def __init__(self, args, normalize=True): super(MnemonicReader, self).__init__() # Store config self.args = args # Word embeddings (+1 for padding) self.embedding = nn.Embedding(args.vocab_size, args.embedding_dim, padding_idx=0) # Char embeddings (+1 for padding) self.char_embedding = nn.Embedding(args.char_size, args.char_embedding_dim, padding_idx=0) # Char rnn to generate char features self.char_rnn = layers.StackedBRNN( input_size=args.char_embedding_dim, hidden_size=args.char_hidden_size, num_layers=1, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) doc_input_size = args.embedding_dim + args.char_hidden_size * 2 + args.num_features # Encoder self.encoding_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) doc_hidden_size = 2 * args.hidden_size # Interactive aligning, self aligning and aggregating self.interactive_aligners = nn.ModuleList() self.interactive_SFUs = nn.ModuleList() self.self_aligners = nn.ModuleList() self.self_SFUs = nn.ModuleList() self.aggregate_rnns = nn.ModuleList() for i in range(args.hop): # interactive aligner self.interactive_aligners.append( layers.SeqAttnMatch(doc_hidden_size, identity=True)) self.interactive_SFUs.append( layers.SFU(doc_hidden_size, 3 * doc_hidden_size)) # self aligner self.self_aligners.append( layers.SelfAttnMatch(doc_hidden_size, identity=True, diag=False)) self.self_SFUs.append( layers.SFU(doc_hidden_size, 3 * doc_hidden_size)) # aggregating self.aggregate_rnns.append( layers.StackedBRNN( input_size=doc_hidden_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, )) # Memmory-based Answer Pointer self.mem_ans_ptr = layers.MemoryAnsPointer( x_size=2 * args.hidden_size, y_size=2 * args.hidden_size, hidden_size=args.hidden_size, hop=args.hop, dropout_rate=args.dropout_rnn, normalize=normalize)
def __init__(self, args, normalize=True): super(R_Net, self).__init__() # Store config self.args = args # Word embeddings (+1 for padding) self.embedding = nn.Embedding(args.vocab_size, args.embedding_dim, padding_idx=0) # Char embeddings (+1 for padding) self.char_embedding = nn.Embedding(args.char_size, args.char_embedding_dim, padding_idx=0) # Char rnn to generate char features self.char_rnn = layers.StackedBRNN( input_size=args.char_embedding_dim, hidden_size=args.char_hidden_size, num_layers=1, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=False, ) doc_input_size = args.embedding_dim + args.char_hidden_size * 2 # Encoder self.encode_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=args.concat_rnn_layers, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) # Output sizes of rnn encoder doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size if args.concat_rnn_layers: doc_hidden_size *= args.doc_layers question_hidden_size *= args.question_layers # Gated-attention-based RNN of the whole question self.question_attn = layers.SeqAttnMatch(question_hidden_size, identity=False) self.question_attn_gate = layers.Gate(doc_hidden_size + question_hidden_size) self.question_attn_rnn = layers.StackedBRNN( input_size=doc_hidden_size + question_hidden_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) question_attn_hidden_size = 2 * args.hidden_size # Self-matching-attention-baed RNN of the whole doc self.doc_self_attn = layers.SelfAttnMatch(question_attn_hidden_size, identity=False) self.doc_self_attn_gate = layers.Gate(question_attn_hidden_size + question_attn_hidden_size) self.doc_self_attn_rnn = layers.StackedBRNN( input_size=question_attn_hidden_size + question_attn_hidden_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) doc_self_attn_hidden_size = 2 * args.hidden_size self.doc_self_attn_rnn2 = layers.StackedBRNN( input_size=doc_self_attn_hidden_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) self.ptr_net = layers.PointerNetwork( x_size = doc_self_attn_hidden_size, y_size = question_hidden_size, hidden_size = args.hidden_size, dropout_rate=args.dropout_rnn, cell_type=nn.GRUCell, normalize=normalize )
def __init__(self, args, normalize=True): super(RnnDocReader, self).__init__() # Store config self.args = args # Word embeddings (+1 for padding) self.embedding = nn.Embedding(args.vocab_size, args.embedding_dim, padding_idx=0) # Projection for attention weighted question if args.use_qemb: self.qemb_match = layers.SeqAttnMatch(args.embedding_dim) # Input size to RNN: word emb + question emb + manual features doc_input_size = args.embedding_dim + args.num_features if args.use_qemb: doc_input_size += args.embedding_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=args.concat_rnn_layers, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) # RNN question encoder self.question_rnn = layers.StackedBRNN( input_size=args.embedding_dim, hidden_size=args.hidden_size, num_layers=args.question_layers, dropout_rate=args.dropout_rnn, dropout_output=args.dropout_rnn_output, concat_layers=args.concat_rnn_layers, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding, ) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size if args.concat_rnn_layers: doc_hidden_size *= args.doc_layers question_hidden_size *= args.question_layers # Question merging if args.question_merge not in ['avg', 'self_attn']: raise NotImplementedError('merge_mode = %s' % args.merge_mode) if args.question_merge == 'self_attn': self.self_attn = layers.LinearSeqAttn(question_hidden_size) # Bilinear attention for span start/end self.start_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, normalize=normalize, ) self.end_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, normalize=normalize, )
def __init__(self, opt, char_embedding, padding_idx=0, normalize_emb=False): super(RnnDocReader, self).__init__() # Store config self.opt = opt char_embedding = torch.FloatTensor(char_embedding) self.char_embedding = nn.Embedding(char_embedding.size(0), char_embedding.size(1), padding_idx=padding_idx) self.char_embedding.weight.data = char_embedding # Projection for attention weighted question if opt.use_qemb: self.qemb_match = layers.SeqAttnMatch(opt.char_emb_dim * 2) self.qemb_match_ds = layers.SeqAttnMatch(opt.char_emb_dim * 2) # Input size to RNN: word emb + question emb + manual features doc_input_size = opt.char_emb_dim * 2 + opt.num_features if opt.use_qemb: doc_input_size += opt.char_emb_dim * 2 if opt.use_interaction: self.qhiden_match = layers.SeqAttnMatch(opt.hidden_size * 2) self.qhiden_match_ds = layers.SeqAttnMatch(opt.hidden_size * 2) self.char_rnn = layers.StackedBRNN( input_size=opt.char_emb_dim, hidden_size=opt.char_emb_dim, num_layers=1, dropout_rate=opt.dropout_rnn, dropout_output=opt.dropout_rnn_output, concat_layers=opt.concat_rnn_layers, use_tanh=True, # bidirectional=True) # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=opt.hidden_size, num_layers=opt.doc_layers, dropout_rate=opt.dropout_rnn, dropout_output=opt.dropout_rnn_output, concat_layers=opt.concat_rnn_layers, res_net=opt.res_net) question_input_size = opt.char_emb_dim * 2 # RNN question encoder self.question_rnn = layers.StackedBRNN( input_size=question_input_size, hidden_size=opt.hidden_size, num_layers=opt.question_layers, dropout_rate=opt.dropout_rnn, dropout_output=opt.dropout_rnn_output, concat_layers=opt.concat_rnn_layers, res_net=opt.res_net) # Output sizes of rnn encoders doc_hidden_size = 2 * opt.hidden_size question_hidden_size = 2 * opt.hidden_size if opt.concat_rnn_layers: doc_hidden_size *= opt.doc_layers question_hidden_size *= opt.question_layers match_in_dim = opt.hidden_size * 2 * 2 # Bilinear attention for span start/end self.s_linear = nn.Linear(match_in_dim, 1) self.e_linear = nn.Linear(match_in_dim + 1, 1)
def __init__(self, args): super(TriAN, self).__init__() self.args = args self.embedding_dim = 300 self.embedding = nn.Embedding(len(vocab), self.embedding_dim, padding_idx=0) self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch(self.embedding_dim) # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 5 + args.rel_emb_dim # Max passage size p_max_size = args.p_max_size self.p_max_size = p_max_size # Max question size q_max_size = args.q_max_size self.q_max_size = q_max_size # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size self.doc_hidden_size = doc_hidden_size question_hidden_size = 2 * args.hidden_size self.question_hidden_size = question_hidden_size # print('p_mask : ' , doc_input_size) # Attention over passage and question self.q_self_attn_start = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_start = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) self.q_self_attn_end = layers.LinearSeqAttn(question_hidden_size, q_max_size) self.p_q_attn_end = layers.BilinearSeqAttn(p_max_size, q_max_size, p_max_size) # Bilinear layer and sigmoid to proba self.p_q_bilinear_start = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_q_bilinear_end = nn.Bilinear(question_hidden_size, question_hidden_size, 1) self.p_linear_start = nn.Linear(question_hidden_size, 1) self.p_linear_end = nn.Linear(question_hidden_size, 1) # Attention start end self.start_end_attn = layers.BilinearProbaAttn(p_max_size) self.end_start_attn = layers.BilinearProbaAttn(p_max_size) # Feed forward self.feedforward_start = layers.NeuralNet(p_max_size, p_max_size, p_max_size) self.feedforward_end = layers.NeuralNet(p_max_size, p_max_size, p_max_size)
def __init__(self, args): super(TriAN, self).__init__() self.args = args if self.args.use_elmo: self.embedding_dim = self.args.elmo_num_layer * 1024 else: self.embedding_dim = 300 self.embedding = nn.Embedding( len(vocab), self.embedding_dim, padding_idx=0) # len is same as vocab size self.embedding.weight.data.fill_(0) self.embedding.weight.data[:2].normal_(0, 0.1) # initialize self.pos_embedding = nn.Embedding(len(pos_vocab), args.pos_emb_dim, padding_idx=0) self.pos_embedding.weight.data.normal_(0, 0.1) self.ner_embedding = nn.Embedding(len(ner_vocab), args.ner_emb_dim, padding_idx=0) self.ner_embedding.weight.data.normal_(0, 0.1) self.rel_embedding = nn.Embedding(len(rel_vocab), args.rel_emb_dim, padding_idx=0) self.rel_embedding.weight.data.normal_(0, 0.1) self.RNN_TYPES = {'lstm': nn.LSTM, 'gru': nn.GRU} self.p_q_emb_match = layers.SeqAttnMatch( self.embedding_dim) # question-aware passage representation self.c_q_emb_match = layers.SeqAttnMatch( self.embedding_dim) # question-aware choice representation self.c_p_emb_match = layers.SeqAttnMatch( self.embedding_dim) # passage-aware choice representation # Input size to RNN: word emb + question emb + pos emb + ner emb + manual features doc_input_size = 2 * self.embedding_dim + args.pos_emb_dim + args.ner_emb_dim + 3 + 2 * args.rel_emb_dim # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=args.hidden_size, num_layers=args.doc_layers, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN question encoder: word emb + pos emb qst_input_size = self.embedding_dim + args.pos_emb_dim self.question_rnn = layers.StackedBRNN( input_size=qst_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # RNN answer encoder choice_input_size = 3 * self.embedding_dim self.choice_rnn = layers.StackedBRNN( input_size=choice_input_size, hidden_size=args.hidden_size, num_layers=1, dropout_rate=0, dropout_output=args.dropout_rnn_output, concat_layers=False, rnn_type=self.RNN_TYPES[args.rnn_type], padding=args.rnn_padding) # Output sizes of rnn encoders doc_hidden_size = 2 * args.hidden_size question_hidden_size = 2 * args.hidden_size choice_hidden_size = 2 * args.hidden_size # Answer merging self.c_self_attn = layers.LinearSeqAttn(choice_hidden_size) self.q_self_attn = layers.LinearSeqAttn(question_hidden_size + 1) # add essential term flag self.c_diff_attn = layers.DiffSeqAttn(choice_hidden_size) self.p_q_attn = layers.BilinearSeqAttn(x_size=doc_hidden_size, y_size=question_hidden_size) #self.p_c_bilinear = nn.Linear(doc_hidden_size, choice_hidden_size) #self.q_c_bilinear = nn.Linear(question_hidden_size, choice_hidden_size) self.p_c_bilinear = nn.Linear(2 * doc_hidden_size, 3 * choice_hidden_size) self.q_c_bilinear = nn.Linear(2 * question_hidden_size, 3 * choice_hidden_size)
def __init__(self, opt, padding_idx=0, embedding=None): super(DRQA, self).__init__() # Store config self.opt = opt # Word embeddings if opt['pretrained_words']: assert embedding is not None self.embedding = nn.Embedding.from_pretrained(embedding, freeze=False) if opt['fix_embeddings']: assert opt['tune_partial'] == 0 self.embedding.weight.requires_grad = False elif opt['tune_partial'] > 0: assert opt['tune_partial'] + 2 < embedding.size(0) offset = self.opt['tune_partial'] + 2 def embedding_hook(grad, offset=offset): grad[offset:] = 0 return grad self.embedding.weight.register_hook(embedding_hook) else: # random initialized self.embedding = nn.Embedding(opt['vocab_size'], opt['embedding_dim'], padding_idx=padding_idx) # Projection for attention weighted question if opt['use_qemb']: self.qemb_match = layers.SeqAttnMatch(opt['embedding_dim']) # Input size to RNN: word emb + question emb + manual features doc_input_size = opt['embedding_dim'] + opt['num_features'] if opt['use_qemb']: doc_input_size += opt['embedding_dim'] if opt['pos']: doc_input_size += opt['pos_size'] if opt['ner']: doc_input_size += opt['ner_size'] # RNN document encoder self.doc_rnn = layers.StackedBRNN( input_size=doc_input_size, hidden_size=opt['hidden_size'], num_layers=opt['doc_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']], padding=opt['rnn_padding'], ) # RNN question encoder self.question_rnn = layers.StackedBRNN( input_size=opt['embedding_dim'], hidden_size=opt['hidden_size'], num_layers=opt['question_layers'], dropout_rate=opt['dropout_rnn'], dropout_output=opt['dropout_rnn_output'], concat_layers=opt['concat_rnn_layers'], rnn_type=self.RNN_TYPES[opt['rnn_type']], padding=opt['rnn_padding'], ) # Output sizes of rnn encoders doc_hidden_size = 2 * opt['hidden_size'] question_hidden_size = 2 * opt['hidden_size'] if opt['concat_rnn_layers']: doc_hidden_size *= opt['doc_layers'] question_hidden_size *= opt['question_layers'] # Question merging if opt['question_merge'] not in ['avg', 'self_attn']: raise NotImplementedError('question_merge = %s' % opt['question_merge']) if opt['question_merge'] == 'self_attn': self.self_attn = layers.LinearSeqAttn(question_hidden_size) # Bilinear attention for span start/end self.start_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, ) self.end_attn = layers.BilinearSeqAttn( doc_hidden_size, question_hidden_size, )