def __init__(self, opt): super(SDNetTrainer, self).__init__(opt) print('SDNet Model Trainer') set_dropout_prob(0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT'])) self.seed = int(opt['SEED']) self.data_prefix = 'coqa-' random.seed(self.seed) np.random.seed(self.seed) torch.manual_seed(self.seed) self.preproc = CoQAPreprocess(self.opt) if self.use_cuda: torch.cuda.manual_seed_all(self.seed)
def __init__(self, opt): super(ConvQA_CN_NetTrainer, self).__init__(opt) print('Model Trainer') set_dropout_prob(0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT'])) self.seed = int(opt['SEED']) self.opt = opt random.seed(self.seed) np.random.seed(self.seed) torch.manual_seed(self.seed) if self.opt['dataset'] == 'quac': self.data_prefix = 'quac-' self.preproc = QuACPreprocess(self.opt) if self.use_cuda: torch.cuda.manual_seed_all(self.seed) ### seq2seq self.train_lang, self.dev_lang = dataprocess("train", "dev") self.opt['train_words'] = self.train_lang.n_words self.opt['dev_words'] = self.dev_lang.n_words
def __init__(self, opt, word_embedding): super(SDNet, self).__init__() print('SDNet model\n') self.opt = opt self.use_cuda = (self.opt['cuda'] == True) set_dropout_prob( 0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT'])) set_seq_dropout('VARIATIONAL_DROPOUT' in self.opt) x_input_size = 0 ques_input_size = 0 self.vocab_size = int(opt['vocab_size']) vocab_dim = int(opt['vocab_dim']) self.vocab_embed = nn.Embedding(self.vocab_size, vocab_dim, padding_idx=1) self.vocab_embed.weight.data = word_embedding x_input_size += vocab_dim ques_input_size += vocab_dim if 'CHAR_CNN' in self.opt: print('CHAR_CNN') char_vocab_size = int(opt['char_vocab_size']) char_dim = int(opt['char_emb_size']) char_hidden_size = int(opt['char_hidden_size']) self.char_embed = nn.Embedding(char_vocab_size, char_dim, padding_idx=1) self.char_cnn = CNN(char_dim, 3, char_hidden_size) self.maxpooling = MaxPooling() x_input_size += char_hidden_size ques_input_size += char_hidden_size if 'TUNE_PARTIAL' in self.opt: print('TUNE_PARTIAL') self.fixed_embedding = word_embedding[opt['tune_partial']:] else: self.vocab_embed.weight.requires_grad = False cdim = 0 self.use_contextual = False if 'BERT' in self.opt: print('Using BERT') self.Bert = Bert(self.opt) if 'LOCK_BERT' in self.opt: print('Lock BERT\'s weights') for p in self.Bert.parameters(): p.requires_grad = False if 'BERT_LARGE' in self.opt: print('BERT_LARGE') bert_dim = 1024 bert_layers = 24 else: bert_dim = 768 bert_layers = 12 print('BERT dim:', bert_dim, 'BERT_LAYERS:', bert_layers) if 'BERT_LINEAR_COMBINE' in self.opt: print('BERT_LINEAR_COMBINE') self.alphaBERT = nn.Parameter(torch.Tensor(bert_layers), requires_grad=True) self.gammaBERT = nn.Parameter(torch.Tensor(1, 1), requires_grad=True) torch.nn.init.constant(self.alphaBERT, 1.0) torch.nn.init.constant(self.gammaBERT, 1.0) cdim = bert_dim x_input_size += bert_dim ques_input_size += bert_dim self.pre_align = Attention(vocab_dim, opt['prealign_hidden'], correlation_func=3, do_similarity=True) x_input_size += vocab_dim pos_dim = opt['pos_dim'] ent_dim = opt['ent_dim'] self.pos_embedding = nn.Embedding(len(POS), pos_dim) self.ent_embedding = nn.Embedding(len(ENT), ent_dim) x_feat_len = 4 if 'ANSWER_SPAN_IN_CONTEXT_FEATURE' in self.opt: print('ANSWER_SPAN_IN_CONTEXT_FEATURE') x_feat_len += 1 x_input_size += pos_dim + ent_dim + x_feat_len print('Initially, the vector_sizes [doc, query] are', x_input_size, ques_input_size) addtional_feat = cdim if self.use_contextual else 0 # RNN context encoder self.context_rnn, context_rnn_output_size = RNN_from_opt( x_input_size, opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=addtional_feat) # RNN question encoder self.ques_rnn, ques_rnn_output_size = RNN_from_opt( ques_input_size, opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=addtional_feat) # Output sizes of rnn encoders print('After Input LSTM, the vector_sizes [doc, query] are [', context_rnn_output_size, ques_rnn_output_size, '] *', opt['in_rnn_layers']) # Deep inter-attention self.deep_attn = DeepAttention(opt, abstr_list_cnt=opt['in_rnn_layers'], deep_att_hidden_size_per_abstr=opt[ 'deep_att_hidden_size_per_abstr'], correlation_func=3, word_hidden_size=vocab_dim + addtional_feat) self.deep_attn_input_size = self.deep_attn.rnn_input_size self.deep_attn_output_size = self.deep_attn.output_size # Question understanding and compression self.high_lvl_ques_rnn, high_lvl_ques_rnn_output_size = RNN_from_opt( ques_rnn_output_size * opt['in_rnn_layers'], opt['highlvl_hidden_size'], num_layers=opt['question_high_lvl_rnn_layers'], concat_rnn=True) self.after_deep_attn_size = self.deep_attn_output_size + self.deep_attn_input_size + addtional_feat + vocab_dim self.self_attn_input_size = self.after_deep_attn_size self_attn_output_size = self.deep_attn_output_size # Self attention on context self.highlvl_self_att = Attention( self.self_attn_input_size, opt['deep_att_hidden_size_per_abstr'], correlation_func=3) print('Self deep-attention input is {}-dim'.format( self.self_attn_input_size)) self.high_lvl_context_rnn, high_lvl_context_rnn_output_size = RNN_from_opt( self.deep_attn_output_size + self_attn_output_size, opt['highlvl_hidden_size'], num_layers=1, concat_rnn=False) context_final_size = high_lvl_context_rnn_output_size print('Do Question self attention') self.ques_self_attn = Attention(high_lvl_ques_rnn_output_size, opt['query_self_attn_hidden_size'], correlation_func=3) ques_final_size = high_lvl_ques_rnn_output_size print('Before answer span finding, hidden size are', context_final_size, ques_final_size) # Question merging self.ques_merger = LinearSelfAttn(ques_final_size) self.get_answer = GetFinalScores(context_final_size, ques_final_size)
def __init__(self, opt, embedding): super(SDNet, self).__init__() print('SDNet model\n') self.opt = opt self.vocab_dim = 300 if 'PHOC' in self.opt: phoc_embedding = embedding['phoc_embedding'] if 'FastText' in self.opt: fast_embedding = embedding['fast_embedding'] if 'GLOVE' in self.opt: glove_embedding = embedding['glove_embedding'] if 'ModelParallel' in self.opt: self.bert_cuda = 'cuda:{}'.format(self.opt['ModelParallel'][-1]) self.main_cuda = 'cuda:{}'.format(self.opt['ModelParallel'][0]) #self.position_dim = opt['position_dim'] self.use_cuda = (self.opt['cuda'] == True) self.q_embedding = opt['q_embedding'].split(',') self.ocr_embedding = opt['ocr_embedding'].split(',') self.LN_flag = 'LN' in self.opt if self.LN_flag: log.info('Do Layer Normalization') else: log.info('Do not do Layer Normalization') set_dropout_prob(0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT'])) set_seq_dropout('VARIATIONAL_DROPOUT' in self.opt) x_input_size = 0 ques_input_size = 0 if 'PHOC' in self.opt: self.vocab_size = int(opt['vocab_size']) self.phoc_dim = int(opt['phoc_dim']) self.phoc_embed = nn.Embedding(self.vocab_size, self.phoc_dim, padding_idx = 1) self.phoc_embed.weight.data = phoc_embedding if 'FastText' in self.opt: self.vocab_size = int(opt['vocab_size']) self.fast_dim = int(opt['fast_dim']) self.fast_embed = nn.Embedding(self.vocab_size, self.fast_dim, padding_idx = 1) self.fast_embed.weight.data = fast_embedding if 'GLOVE' in self.opt: self.vocab_size = int(opt['vocab_size']) self.glove_dim = int(opt['glove_dim']) self.glove_embed = nn.Embedding(self.vocab_size, self.glove_dim, padding_idx = 1) self.glove_embed.weight.data = glove_embedding x_input_size += self.glove_dim if 'glove' in self.ocr_embedding else 0 ques_input_size += self.glove_dim if 'glove' in self.q_embedding else 0 x_input_size += self.fast_dim if 'fasttext' in self.ocr_embedding else 0 ques_input_size += self.fast_dim if 'fasttext' in self.q_embedding else 0 x_input_size += self.phoc_dim if 'phoc' in self.ocr_embedding else 0 ques_input_size += self.phoc_dim if 'phoc' in self.q_embedding else 0 if 'TUNE_PARTIAL' in self.opt: print('TUNE_PARTIAL') if 'FastText' in self.opt: self.fixed_embedding_fast = fast_embedding[opt['tune_partial']:] if 'GLOVE' in self.opt: self.fixed_embedding_glove = glove_embedding[opt['tune_partial']:] else: if 'FastText' in self.opt: self.fast_embed.weight.requires_grad = False if 'GLOVE' in self.opt: self.glove_embed.weight.requires_grad = False if 'BERT' in self.opt: print('Using BERT') self.Bert = Bert(self.opt) if 'LOCK_BERT' in self.opt: print('Lock BERT\'s weights') for p in self.Bert.parameters(): p.requires_grad = False if 'BERT_LARGE' in self.opt: print('BERT_LARGE') bert_dim = 1024 bert_layers = 24 else: bert_dim = 768 bert_layers = 12 print('BERT dim:', bert_dim, 'BERT_LAYERS:', bert_layers) if 'BERT_LINEAR_COMBINE' in self.opt: print('BERT_LINEAR_COMBINE') self.alphaBERT = nn.Parameter(torch.Tensor(bert_layers), requires_grad=True) self.gammaBERT = nn.Parameter(torch.Tensor(1, 1), requires_grad=True) torch.nn.init.constant_(self.alphaBERT, 1.0) torch.nn.init.constant_(self.gammaBERT, 1.0) cdim = bert_dim x_input_size += bert_dim if 'bert' in self.ocr_embedding or 'bert_only' in self.ocr_embedding else 0 ques_input_size += bert_dim if 'bert' in self.q_embedding or 'bert_only' in self.q_embedding else 0 if 'PRE_ALIGN' in self.opt: self.pre_align = Attention(self.vocab_dim, opt['prealign_hidden'], correlation_func = 3, do_similarity = True) if 'PRE_ALIGN_befor_rnn' in self.opt: x_input_size += self.vocab_dim if 'pos' in self.q_embedding or 'pos' in self.ocr_embedding: pos_dim = opt['pos_dim'] self.pos_embedding = nn.Embedding(len(POS), pos_dim) x_input_size += pos_dim if 'pos' in self.ocr_embedding else 0 ques_input_size += pos_dim if 'pos' in self.q_embedding else 0 if 'ent' in self.q_embedding or 'pos' in self.ocr_embedding: ent_dim = opt['ent_dim'] self.ent_embedding = nn.Embedding(len(ENT), ent_dim) x_input_size += ent_dim if 'ent' in self.ocr_embedding else 0 ques_input_size += ent_dim if 'ent' in self.q_embedding else 0 print('Initially, the vector_sizes [ocr, query] are', x_input_size, ques_input_size) addtional_feat = 0 self.LN = 'LN' in opt self.multi2one, multi2one_output_size = RNN_from_opt(x_input_size, opt['multi2one_hidden_size'],num_layers=1, concat_rnn=opt['concat_rnn'], add_feat=addtional_feat, bidirectional=self.opt['multi2one_bidir']) # if 'LN' in self.opt: # self.ocr_input_ln = nn.LayerNorm([opt['batch_size'], opt['max_ocr_num'], multi2one_output_size]) # self.od_input_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], multi2one_output_size]) self.multi2one_output_size = multi2one_output_size # RNN context encoder self.context_rnn, context_rnn_output_size = RNN_from_opt(multi2one_output_size, opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=addtional_feat) # RNN question encoder self.ques_rnn, ques_rnn_output_size = RNN_from_opt(ques_input_size, opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=addtional_feat) # if 'LN' in self.opt: # self.ocr_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_ocr_num'], context_rnn_output_size]) # self.od_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], context_rnn_output_size]) # self.q_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], ques_rnn_output_size]) # Output sizes of rnn encoders print('After Input LSTM, the vector_sizes [doc, query] are [', context_rnn_output_size, ques_rnn_output_size, '] *', opt['in_rnn_layers']) # Deep inter-attention if ('GLOVE' not in self.opt) and ('FastText' not in self.opt): _word_hidden_size = 0 else: _word_hidden_size = multi2one_output_size + addtional_feat self.deep_attn = DeepAttention(opt, abstr_list_cnt=opt['in_rnn_layers'], deep_att_hidden_size_per_abstr=opt['deep_att_hidden_size_per_abstr'], correlation_func=3, word_hidden_size=_word_hidden_size) self.deep_attn_input_size = self.deep_attn.rnn_input_size self.deep_attn_output_size = self.deep_attn.output_size print('Deep Attention: input: {}, hidden input: {}, output: {}'.format(self.deep_attn.att_size, self.deep_attn_input_size, self.deep_attn_output_size)) # Question understanding and compression self.high_lvl_ques_rnn , high_lvl_ques_rnn_output_size = RNN_from_opt(ques_rnn_output_size * opt['in_rnn_layers'], opt['highlvl_hidden_size'], num_layers = opt['question_high_lvl_rnn_layers'], concat_rnn = True) self.after_deep_attn_size = self.deep_attn_output_size + self.deep_attn_input_size + addtional_feat + multi2one_output_size self.self_attn_input_size = self.after_deep_attn_size # Self attention on context if 'no_Context_Self_Attention' in self.opt: print('no self attention on context') self_attn_output_size = 0 else: self.highlvl_self_att = Attention(self.self_attn_input_size, opt['deep_att_hidden_size_per_abstr'], correlation_func=3) self_attn_output_size = self.deep_attn_output_size print('Self deep-attention input is {}-dim'.format(self.self_attn_input_size)) self.high_lvl_context_rnn, high_lvl_context_rnn_output_size = RNN_from_opt(self.deep_attn_output_size + self_attn_output_size, opt['highlvl_hidden_size'], num_layers = 1, concat_rnn = False) context_final_size = high_lvl_context_rnn_output_size # if 'LN' in self.opt: # self.ocr_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_ocr_num'], high_lvl_context_rnn_output_size]) # self.od_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], high_lvl_context_rnn_output_size]) # self.q_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], high_lvl_ques_rnn_output_size]) print('Do Question self attention') self.ques_self_attn = Attention(high_lvl_ques_rnn_output_size, opt['query_self_attn_hidden_size'], correlation_func=3) ques_final_size = high_lvl_ques_rnn_output_size print('Before answer span finding, hidden size are', context_final_size, ques_final_size) if 'position_dim' in self.opt: if self.opt['position_mod'] == 'qk+': self.od_ocr_attn = Attention(context_final_size, opt['hidden_size'], correlation_func = 3, do_similarity = True) self.position_attn = Attention(self.opt['position_dim'], opt['hidden_size'], correlation_func = 3, do_similarity = True) position_att_output_size = context_final_size elif self.opt['position_mod'] == 'cat': self.od_ocr_attn = Attention(context_final_size+self.opt['position_dim'], opt['hidden_size'], correlation_func = 3, do_similarity = True) position_att_output_size = context_final_size + self.opt['position_dim'] # Question merging self.ques_merger = LinearSelfAttn(ques_final_size) if self.opt['pos_att_merge_mod'] == 'cat': ocr_final_size = context_final_size + position_att_output_size # self.get_answer = GetFinalScores(context_final_size + position_att_output_size, ques_final_size) elif self.opt['pos_att_merge_mod'] == 'atted': ocr_final_size = position_att_output_size # self.get_answer = GetFinalScores(position_att_output_size, ques_final_size) elif self.opt['pos_att_merge_mod'] == 'original': ocr_final_size = context_final_size # self.get_answer = GetFinalScores(context_final_size, ques_final_size) if 'img_feature' in self.opt: if self.opt['img_fea_way'] == 'replace_od': self.img_fea_num = self.opt['img_fea_num'] self.img_fea_dim = self.opt['img_fea_dim'] self.img_spa_dim = self.opt['img_spa_dim'] self.img_fea2od = nn.Linear(self.opt['img_fea_dim'], multi2one_output_size) # self.pro_que_rnn, pro_que_rnn_output_size = RNN_from_opt(ques_input_size, multi2one_output_size//2) # assert pro_que_rnn_output_size == multi2one_output_size # ques_input_size = multi2one_output_size elif self.opt['img_fea_way'] == 'final_att': self.img_fea_num = self.opt['img_fea_num'] self.img_fea_dim = self.opt['img_fea_dim'] self.img_spa_dim = self.opt['img_spa_dim'] self.image_feature_model = Image_feature_model(ques_final_size, self.img_fea_dim) self.ocr_final_model = Image_feature_model(ques_final_size, ocr_final_size) self.fixed_ocr_alpha = nn.Parameter(torch.Tensor(1, 1), requires_grad=True) torch.nn.init.constant_(self.fixed_ocr_alpha, 0.5) ques_final_size += ques_final_size * 2 else: assert False self.get_answer = GetFinalScores(ocr_final_size, ques_final_size, yesno='label_yesno' in self.opt, no_answer='label_no_answer' in self.opt, useES='useES' in self.opt) if 'fixed_answers' in self.opt: self.fixed_ans_classifier = Fixed_answers_predictor(ques_final_size, self.opt['fixed_answers_len']) if 'ES_ocr' in self.opt and self.opt['ES_using_way'] == 'post_process': self.ES_linear = nn.Linear(multi2one_output_size, ocr_final_size) self.ES_ocr_att = Attention(ocr_final_size, opt['hidden_size'], correlation_func = 3, do_similarity = True) # elif self.opt['ES_using_way'] == 'as_ocr': log.debug('Network build successes')
def __init__(self, opt, word_embedding ): # word_embedding为SDNet构建的词表中单词的GloVe编码,用于初始化编码层的权重 super(SDNet, self).__init__() print('SDNet model\n') self.opt = opt set_dropout_prob(0. if not 'DROPOUT' in opt else float( opt['DROPOUT'])) # 设置Dropout比率 set_seq_dropout('VARIATIONAL_DROPOUT' in self.opt) x_input_size = 0 # 统计文章单词(x)的feature维度总和 ques_input_size = 0 # 统计问题单词(ques)的feature维度总和 self.vocab_size = int(opt['vocab_size']) # 词表大小 vocab_dim = int(opt['vocab_dim']) # GloVe编码维度 self.vocab_embed = nn.Embedding(num_embeddings=self.vocab_size, embedding_dim=vocab_dim, padding_idx=1) self.vocab_embed.weight.data = word_embedding # 用GloVe编码初始化编码层权重 x_input_size += vocab_dim ques_input_size += vocab_dim if 'CHAR_CNN' in self.opt: print('CHAR_CNN') char_vocab_size = int(opt['char_vocab_size']) char_dim = int(opt['char_emb_size']) char_hidden_size = int(opt['char_hidden_size']) self.char_embed = nn.Embedding(num_embeddings=char_vocab_size, embedding_dim=char_dim, padding_idx=1) self.char_cnn = CNN(input_size=char_dim, window_size=3, output_size=char_hidden_size) self.maxpooling = MaxPooling() x_input_size += char_hidden_size ques_input_size += char_hidden_size if 'TUNE_PARTIAL' in self.opt: print('TUNE_PARTIAL') self.fixed_embedding = word_embedding[opt['tune_partial']:] else: self.vocab_embed.weight.data.requires_grad = False cdim = 0 self.use_contextual = False if 'BERT' in self.opt: print('Using BERT') self.Bert = Bert(self.opt) if 'LOCK_BERT' in self.opt: print('Lock BERT\'s weights') for p in self.Bert.parameters(): # 锁定BERT权重不进行更新 p.requires_grad = False if 'BERT_LARGE' in self.opt: print('BERT_LARGE') bert_dim = 1024 bert_layers = 24 else: bert_dim = 768 bert_layers = 12 print('BERT dim:', bert_dim, 'BERT_LAYERS:', bert_layers) if 'BERT_LINEAR_COMBINE' in self.opt: print('BERT_LINEAR_COMBINE' ) # 如果对BERT每层输出的编码计算加权和,则需要定义权重alpha和gamma self.alphaBERT = nn.Parameter(torch.Tensor(bert_layers), requires_grad=True) self.gammaBERT = nn.Parameter(torch.Tensor(1, 1), requires_grad=True) torch.nn.init.constant(self.alphaBERT, 1.0) torch.nn.init.constant(self.gammaBERT, 1.0) cdim = bert_dim x_input_size += bert_dim ques_input_size += bert_dim # 单词注意力层 self.pre_align = Attention(input_size=vocab_dim, hidden_size=opt['prealign_hidden'], correlation_func=3, do_similarity=True) x_input_size += vocab_dim # 词性和命名实体标注编码 pos_dim = opt['pos_dim'] ent_dim = opt['ent_dim'] self.pos_embedding = nn.Embedding(num_embeddings=len(POS), embedding_dim=pos_dim) self.ent_embedding = nn.Embedding(num_embeddings=len(ENT), embedding_dim=ent_dim) # 文章单词的4维feature,包括词频、精确匹配等 x_feat_len = 4 if 'ANSWER_SPAN_IN_CONTEXT_FEATURE' in self.opt: print('ANSWER_SPAN_IN_CONTEXT_FEATURE') x_feat_len += 1 x_input_size += pos_dim + ent_dim + x_feat_len print('Initially, the vector_sizes [doc, query] are', x_input_size, ques_input_size) additional_feat = cdim if self.use_contextual else 0 # 文章RNN层 self.context_rnn, context_rnn_output_size = RNN_from_opt( input_size_=x_input_size, hidden_size_=opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=additional_feat) # 问题RNN层 self.ques_rnn, ques_rnn_output_size = RNN_from_opt( input_size_=ques_input_size, hidden_size_=opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=additional_feat) # RNN层输出大小 print('After Input LSTM, the vector_sizes [doc, query] are [', context_rnn_output_size, ques_rnn_output_size, '] * ', opt['in_rnn_layers']) # 全关注互注意力 self.deep_attn = DeepAttention(opt=opt, abstr_list_cnt=opt['in_rnn_layers'], deep_att_hidden_size_per_abstr=opt[ 'deep_att_hidden_size_per_abstr'], correlation_func=3, word_hidden_size=vocab_dim + additional_feat) self.deep_attn_input_size = self.deep_attn.rnn_input_size self.deep_attn_output_size = self.deep_attn.output_size # 问题理解层 self.high_lvl_ques_rnn, high_lvl_ques_rnn_output_size = RNN_from_opt( input_size_=ques_rnn_output_size * opt['in_rnn_layers'], hidden_size_=opt['highlvl_hidden_size'], num_layers=opt['question_high_lvl_rnn_layers'], concat_rnn=True) # 统计当前文章单词历史维度 self.after_deep_attn_size = self.deep_attn_output_size + self.deep_attn_input_size + additional_feat + vocab_dim self.self_attn_input_size = self.after_deep_attn_size self_attn_output_size = self.deep_attn_output_size # 文章单词自注意力层 self.highlvl_self_attn = Attention( input_size=self.self_attn_input_size, hidden_size=opt['deep_att_hidden_size_per_abstr'], correlation_func=3) print('Self deep-attention input is {}-dim'.format( self.self_attn_input_size)) # 文章单词高级RNN层 self.high_lvl_context_rnn, high_lvl_context_rnn_output_size = RNN_from_opt( input_size_=self.deep_attn_output_size + self_attn_output_size, hidden_size_=opt['highlvl_hidden_size'], num_layers=1, concat_rnn=False) # 文章单词最终维度 context_final_size = high_lvl_context_rnn_output_size # 问题自注意力层 print('Do Question self attention') self.ques_self_attn = Attention( input_size=high_lvl_ques_rnn_output_size, hidden_size=opt['query_self_attn_hidden_size'], correlation_func=3) # 问题单词的最终维度 ques_final_size = high_lvl_ques_rnn_output_size print('Before answer span finding, hidden size are', context_final_size, ques_final_size) # 线性注意力层,用于获得问题的向量表示 self.ques_merger = LinearSelfAttn(input_size=ques_final_size) # 分数输出层 self.get_answer = GetFinalScores(x_size=context_final_size, h_size=ques_final_size)