Esempio n. 1
0
 def __init__(self, opt):
     super(SDNetTrainer, self).__init__(opt)
     print('SDNet Model Trainer')
     set_dropout_prob(0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT']))
     self.seed = int(opt['SEED'])
     self.data_prefix = 'coqa-'
     random.seed(self.seed)
     np.random.seed(self.seed)
     torch.manual_seed(self.seed)
     self.preproc = CoQAPreprocess(self.opt)
     if self.use_cuda:
         torch.cuda.manual_seed_all(self.seed)
Esempio n. 2
0
    def __init__(self, opt):
        super(ConvQA_CN_NetTrainer, self).__init__(opt)
        print('Model Trainer')
        set_dropout_prob(0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT']))
        self.seed = int(opt['SEED'])
        self.opt = opt
        random.seed(self.seed)
        np.random.seed(self.seed)
        torch.manual_seed(self.seed)
        if self.opt['dataset'] == 'quac':
            self.data_prefix = 'quac-'
            self.preproc = QuACPreprocess(self.opt)
        if self.use_cuda:
            torch.cuda.manual_seed_all(self.seed)

        ### seq2seq
        self.train_lang, self.dev_lang = dataprocess("train", "dev")
        self.opt['train_words'] = self.train_lang.n_words
        self.opt['dev_words'] = self.dev_lang.n_words
Esempio n. 3
0
    def __init__(self, opt, word_embedding):
        super(SDNet, self).__init__()
        print('SDNet model\n')

        self.opt = opt
        self.use_cuda = (self.opt['cuda'] == True)
        set_dropout_prob(
            0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT']))
        set_seq_dropout('VARIATIONAL_DROPOUT' in self.opt)

        x_input_size = 0
        ques_input_size = 0

        self.vocab_size = int(opt['vocab_size'])
        vocab_dim = int(opt['vocab_dim'])
        self.vocab_embed = nn.Embedding(self.vocab_size,
                                        vocab_dim,
                                        padding_idx=1)
        self.vocab_embed.weight.data = word_embedding

        x_input_size += vocab_dim
        ques_input_size += vocab_dim

        if 'CHAR_CNN' in self.opt:
            print('CHAR_CNN')
            char_vocab_size = int(opt['char_vocab_size'])
            char_dim = int(opt['char_emb_size'])
            char_hidden_size = int(opt['char_hidden_size'])
            self.char_embed = nn.Embedding(char_vocab_size,
                                           char_dim,
                                           padding_idx=1)
            self.char_cnn = CNN(char_dim, 3, char_hidden_size)
            self.maxpooling = MaxPooling()
            x_input_size += char_hidden_size
            ques_input_size += char_hidden_size

        if 'TUNE_PARTIAL' in self.opt:
            print('TUNE_PARTIAL')
            self.fixed_embedding = word_embedding[opt['tune_partial']:]
        else:
            self.vocab_embed.weight.requires_grad = False

        cdim = 0
        self.use_contextual = False

        if 'BERT' in self.opt:
            print('Using BERT')
            self.Bert = Bert(self.opt)
            if 'LOCK_BERT' in self.opt:
                print('Lock BERT\'s weights')
                for p in self.Bert.parameters():
                    p.requires_grad = False
            if 'BERT_LARGE' in self.opt:
                print('BERT_LARGE')
                bert_dim = 1024
                bert_layers = 24
            else:
                bert_dim = 768
                bert_layers = 12

            print('BERT dim:', bert_dim, 'BERT_LAYERS:', bert_layers)

            if 'BERT_LINEAR_COMBINE' in self.opt:
                print('BERT_LINEAR_COMBINE')
                self.alphaBERT = nn.Parameter(torch.Tensor(bert_layers),
                                              requires_grad=True)
                self.gammaBERT = nn.Parameter(torch.Tensor(1, 1),
                                              requires_grad=True)
                torch.nn.init.constant(self.alphaBERT, 1.0)
                torch.nn.init.constant(self.gammaBERT, 1.0)

            cdim = bert_dim
            x_input_size += bert_dim
            ques_input_size += bert_dim

        self.pre_align = Attention(vocab_dim,
                                   opt['prealign_hidden'],
                                   correlation_func=3,
                                   do_similarity=True)
        x_input_size += vocab_dim

        pos_dim = opt['pos_dim']
        ent_dim = opt['ent_dim']
        self.pos_embedding = nn.Embedding(len(POS), pos_dim)
        self.ent_embedding = nn.Embedding(len(ENT), ent_dim)

        x_feat_len = 4
        if 'ANSWER_SPAN_IN_CONTEXT_FEATURE' in self.opt:
            print('ANSWER_SPAN_IN_CONTEXT_FEATURE')
            x_feat_len += 1

        x_input_size += pos_dim + ent_dim + x_feat_len

        print('Initially, the vector_sizes [doc, query] are', x_input_size,
              ques_input_size)

        addtional_feat = cdim if self.use_contextual else 0

        # RNN context encoder
        self.context_rnn, context_rnn_output_size = RNN_from_opt(
            x_input_size,
            opt['hidden_size'],
            num_layers=opt['in_rnn_layers'],
            concat_rnn=opt['concat_rnn'],
            add_feat=addtional_feat)
        # RNN question encoder
        self.ques_rnn, ques_rnn_output_size = RNN_from_opt(
            ques_input_size,
            opt['hidden_size'],
            num_layers=opt['in_rnn_layers'],
            concat_rnn=opt['concat_rnn'],
            add_feat=addtional_feat)

        # Output sizes of rnn encoders
        print('After Input LSTM, the vector_sizes [doc, query] are [',
              context_rnn_output_size, ques_rnn_output_size, '] *',
              opt['in_rnn_layers'])

        # Deep inter-attention
        self.deep_attn = DeepAttention(opt,
                                       abstr_list_cnt=opt['in_rnn_layers'],
                                       deep_att_hidden_size_per_abstr=opt[
                                           'deep_att_hidden_size_per_abstr'],
                                       correlation_func=3,
                                       word_hidden_size=vocab_dim +
                                       addtional_feat)
        self.deep_attn_input_size = self.deep_attn.rnn_input_size
        self.deep_attn_output_size = self.deep_attn.output_size

        # Question understanding and compression
        self.high_lvl_ques_rnn, high_lvl_ques_rnn_output_size = RNN_from_opt(
            ques_rnn_output_size * opt['in_rnn_layers'],
            opt['highlvl_hidden_size'],
            num_layers=opt['question_high_lvl_rnn_layers'],
            concat_rnn=True)

        self.after_deep_attn_size = self.deep_attn_output_size + self.deep_attn_input_size + addtional_feat + vocab_dim
        self.self_attn_input_size = self.after_deep_attn_size
        self_attn_output_size = self.deep_attn_output_size

        # Self attention on context
        self.highlvl_self_att = Attention(
            self.self_attn_input_size,
            opt['deep_att_hidden_size_per_abstr'],
            correlation_func=3)
        print('Self deep-attention input is {}-dim'.format(
            self.self_attn_input_size))

        self.high_lvl_context_rnn, high_lvl_context_rnn_output_size = RNN_from_opt(
            self.deep_attn_output_size + self_attn_output_size,
            opt['highlvl_hidden_size'],
            num_layers=1,
            concat_rnn=False)
        context_final_size = high_lvl_context_rnn_output_size

        print('Do Question self attention')
        self.ques_self_attn = Attention(high_lvl_ques_rnn_output_size,
                                        opt['query_self_attn_hidden_size'],
                                        correlation_func=3)

        ques_final_size = high_lvl_ques_rnn_output_size
        print('Before answer span finding, hidden size are',
              context_final_size, ques_final_size)

        # Question merging
        self.ques_merger = LinearSelfAttn(ques_final_size)
        self.get_answer = GetFinalScores(context_final_size, ques_final_size)
Esempio n. 4
0
    def __init__(self, opt, embedding):
        super(SDNet, self).__init__()
        print('SDNet model\n')
        self.opt = opt
        self.vocab_dim = 300
        if 'PHOC' in self.opt:
            phoc_embedding = embedding['phoc_embedding']
        if 'FastText' in self.opt:
            fast_embedding = embedding['fast_embedding']
        if 'GLOVE' in self.opt:
            glove_embedding = embedding['glove_embedding']
        if 'ModelParallel' in self.opt:
            self.bert_cuda = 'cuda:{}'.format(self.opt['ModelParallel'][-1])
            self.main_cuda = 'cuda:{}'.format(self.opt['ModelParallel'][0])
        #self.position_dim = opt['position_dim']
        self.use_cuda = (self.opt['cuda'] == True)
        self.q_embedding = opt['q_embedding'].split(',')
        self.ocr_embedding = opt['ocr_embedding'].split(',')
        self.LN_flag = 'LN' in self.opt
        if self.LN_flag:
            log.info('Do Layer Normalization')
        else:
            log.info('Do not do Layer Normalization')

        set_dropout_prob(0.0 if not 'DROPOUT' in opt else float(opt['DROPOUT']))
        set_seq_dropout('VARIATIONAL_DROPOUT' in self.opt)

        x_input_size = 0
        ques_input_size = 0

        if 'PHOC' in self.opt:
            self.vocab_size = int(opt['vocab_size'])
            self.phoc_dim = int(opt['phoc_dim'])
            self.phoc_embed = nn.Embedding(self.vocab_size, self.phoc_dim, padding_idx = 1)
            self.phoc_embed.weight.data = phoc_embedding

        if 'FastText' in self.opt:
            self.vocab_size = int(opt['vocab_size'])
            self.fast_dim = int(opt['fast_dim'])
            self.fast_embed = nn.Embedding(self.vocab_size, self.fast_dim, padding_idx = 1)
            self.fast_embed.weight.data = fast_embedding

        if 'GLOVE' in self.opt:
            self.vocab_size = int(opt['vocab_size'])
            self.glove_dim = int(opt['glove_dim'])
            self.glove_embed = nn.Embedding(self.vocab_size, self.glove_dim, padding_idx = 1)
            self.glove_embed.weight.data = glove_embedding

        x_input_size += self.glove_dim if 'glove' in self.ocr_embedding else 0
        ques_input_size += self.glove_dim if 'glove' in self.q_embedding else 0
        x_input_size += self.fast_dim if 'fasttext' in self.ocr_embedding else 0
        ques_input_size += self.fast_dim if 'fasttext' in self.q_embedding else 0
        x_input_size += self.phoc_dim if 'phoc' in self.ocr_embedding else 0
        ques_input_size += self.phoc_dim if 'phoc' in self.q_embedding else 0

        if 'TUNE_PARTIAL' in self.opt:
            print('TUNE_PARTIAL')
            if 'FastText' in self.opt:
                self.fixed_embedding_fast = fast_embedding[opt['tune_partial']:]
            if 'GLOVE' in self.opt:
                self.fixed_embedding_glove = glove_embedding[opt['tune_partial']:]
        else:
            if 'FastText' in self.opt:
                self.fast_embed.weight.requires_grad = False
            if 'GLOVE' in self.opt:
                self.glove_embed.weight.requires_grad = False

        if 'BERT' in self.opt:
            print('Using BERT')
            self.Bert = Bert(self.opt)
            if 'LOCK_BERT' in self.opt:
                print('Lock BERT\'s weights')
                for p in self.Bert.parameters():
                    p.requires_grad = False
            if 'BERT_LARGE' in self.opt:
                print('BERT_LARGE')
                bert_dim = 1024
                bert_layers = 24
            else:
                bert_dim = 768
                bert_layers = 12

            print('BERT dim:', bert_dim, 'BERT_LAYERS:', bert_layers)    

            if 'BERT_LINEAR_COMBINE' in self.opt:
                print('BERT_LINEAR_COMBINE')
                self.alphaBERT = nn.Parameter(torch.Tensor(bert_layers), requires_grad=True)
                self.gammaBERT = nn.Parameter(torch.Tensor(1, 1), requires_grad=True)
                torch.nn.init.constant_(self.alphaBERT, 1.0)
                torch.nn.init.constant_(self.gammaBERT, 1.0)
                
            cdim = bert_dim
            x_input_size += bert_dim if 'bert' in self.ocr_embedding or 'bert_only' in self.ocr_embedding else 0
            ques_input_size += bert_dim if 'bert' in self.q_embedding or 'bert_only' in self.q_embedding else 0

        if 'PRE_ALIGN' in self.opt:
            self.pre_align = Attention(self.vocab_dim, opt['prealign_hidden'], correlation_func = 3, do_similarity = True)
            if 'PRE_ALIGN_befor_rnn' in self.opt:
                x_input_size += self.vocab_dim

        if 'pos' in self.q_embedding or 'pos' in self.ocr_embedding:
            pos_dim = opt['pos_dim']
            self.pos_embedding = nn.Embedding(len(POS), pos_dim)
            x_input_size += pos_dim if 'pos' in self.ocr_embedding else 0
            ques_input_size += pos_dim if 'pos' in self.q_embedding else 0
        if 'ent' in self.q_embedding or 'pos' in self.ocr_embedding:
            ent_dim = opt['ent_dim']
            self.ent_embedding = nn.Embedding(len(ENT), ent_dim)
            x_input_size += ent_dim if 'ent' in self.ocr_embedding else 0
            ques_input_size += ent_dim if 'ent' in self.q_embedding else 0
        

        print('Initially, the vector_sizes [ocr, query] are', x_input_size, ques_input_size)
        addtional_feat = 0
        self.LN = 'LN' in opt

        self.multi2one, multi2one_output_size = RNN_from_opt(x_input_size, opt['multi2one_hidden_size'],num_layers=1, concat_rnn=opt['concat_rnn'], add_feat=addtional_feat, bidirectional=self.opt['multi2one_bidir'])

        # if 'LN' in self.opt:
        #     self.ocr_input_ln = nn.LayerNorm([opt['batch_size'], opt['max_ocr_num'], multi2one_output_size])
        #     self.od_input_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], multi2one_output_size])
        
        self.multi2one_output_size = multi2one_output_size

        # RNN context encoder
        
        self.context_rnn, context_rnn_output_size = RNN_from_opt(multi2one_output_size, opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=addtional_feat)
        
        # RNN question encoder
        self.ques_rnn, ques_rnn_output_size = RNN_from_opt(ques_input_size, opt['hidden_size'], num_layers=opt['in_rnn_layers'], concat_rnn=opt['concat_rnn'], add_feat=addtional_feat)
        # if 'LN' in self.opt:
        #     self.ocr_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_ocr_num'], context_rnn_output_size])
        #     self.od_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], context_rnn_output_size])
        #     self.q_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], ques_rnn_output_size])

        # Output sizes of rnn encoders
        print('After Input LSTM, the vector_sizes [doc, query] are [', context_rnn_output_size, ques_rnn_output_size, '] *', opt['in_rnn_layers'])

        # Deep inter-attention
        if ('GLOVE' not in self.opt) and ('FastText' not in self.opt):
            _word_hidden_size = 0
        else:
            _word_hidden_size = multi2one_output_size + addtional_feat

        self.deep_attn = DeepAttention(opt, abstr_list_cnt=opt['in_rnn_layers'], deep_att_hidden_size_per_abstr=opt['deep_att_hidden_size_per_abstr'], correlation_func=3, word_hidden_size=_word_hidden_size)

        self.deep_attn_input_size = self.deep_attn.rnn_input_size
        self.deep_attn_output_size = self.deep_attn.output_size
        print('Deep Attention: input: {}, hidden input: {}, output: {}'.format(self.deep_attn.att_size, self.deep_attn_input_size, self.deep_attn_output_size))

        # Question understanding and compression
        self.high_lvl_ques_rnn , high_lvl_ques_rnn_output_size = RNN_from_opt(ques_rnn_output_size * opt['in_rnn_layers'], opt['highlvl_hidden_size'], num_layers = opt['question_high_lvl_rnn_layers'], concat_rnn = True)


        self.after_deep_attn_size = self.deep_attn_output_size + self.deep_attn_input_size + addtional_feat + multi2one_output_size
        self.self_attn_input_size = self.after_deep_attn_size
                

        # Self attention on context
        if 'no_Context_Self_Attention' in self.opt:
            print('no self attention on context')
            self_attn_output_size = 0
        else:
            self.highlvl_self_att = Attention(self.self_attn_input_size, opt['deep_att_hidden_size_per_abstr'], correlation_func=3)
            self_attn_output_size = self.deep_attn_output_size
            print('Self deep-attention input is {}-dim'.format(self.self_attn_input_size))

        self.high_lvl_context_rnn, high_lvl_context_rnn_output_size = RNN_from_opt(self.deep_attn_output_size + self_attn_output_size, opt['highlvl_hidden_size'], num_layers = 1, concat_rnn = False)
        context_final_size = high_lvl_context_rnn_output_size
        # if 'LN' in self.opt:
        #     self.ocr_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_ocr_num'], high_lvl_context_rnn_output_size])
        #     self.od_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], high_lvl_context_rnn_output_size])
        #     self.q_rnn1_ln = nn.LayerNorm([opt['batch_size'], opt['max_od_num'], high_lvl_ques_rnn_output_size])

        print('Do Question self attention')
        self.ques_self_attn = Attention(high_lvl_ques_rnn_output_size, opt['query_self_attn_hidden_size'], correlation_func=3)
        
        ques_final_size = high_lvl_ques_rnn_output_size
        print('Before answer span finding, hidden size are', context_final_size, ques_final_size)

        
        if 'position_dim' in self.opt:
            if self.opt['position_mod'] == 'qk+':
                self.od_ocr_attn = Attention(context_final_size, opt['hidden_size'], correlation_func = 3, do_similarity = True)
                self.position_attn = Attention(self.opt['position_dim'], opt['hidden_size'], correlation_func = 3, do_similarity = True)
                position_att_output_size = context_final_size
            elif self.opt['position_mod'] == 'cat':
                self.od_ocr_attn = Attention(context_final_size+self.opt['position_dim'], opt['hidden_size'], correlation_func = 3, do_similarity = True)
                position_att_output_size = context_final_size + self.opt['position_dim']
        # Question merging
        self.ques_merger = LinearSelfAttn(ques_final_size)
        if self.opt['pos_att_merge_mod'] == 'cat':
            ocr_final_size = context_final_size + position_att_output_size
            # self.get_answer = GetFinalScores(context_final_size + position_att_output_size, ques_final_size)
        elif self.opt['pos_att_merge_mod'] == 'atted':
            ocr_final_size = position_att_output_size
            # self.get_answer = GetFinalScores(position_att_output_size, ques_final_size)
        elif self.opt['pos_att_merge_mod'] == 'original':
            ocr_final_size = context_final_size
            # self.get_answer = GetFinalScores(context_final_size, ques_final_size)
        if 'img_feature' in self.opt:
            if self.opt['img_fea_way'] == 'replace_od':
                self.img_fea_num = self.opt['img_fea_num']
                self.img_fea_dim = self.opt['img_fea_dim']
                self.img_spa_dim = self.opt['img_spa_dim']
                self.img_fea2od = nn.Linear(self.opt['img_fea_dim'], multi2one_output_size)
                # self.pro_que_rnn, pro_que_rnn_output_size = RNN_from_opt(ques_input_size, multi2one_output_size//2)
                # assert pro_que_rnn_output_size == multi2one_output_size
                # ques_input_size = multi2one_output_size
            elif self.opt['img_fea_way'] == 'final_att':
                self.img_fea_num = self.opt['img_fea_num']
                self.img_fea_dim = self.opt['img_fea_dim']
                self.img_spa_dim = self.opt['img_spa_dim']
                self.image_feature_model = Image_feature_model(ques_final_size, self.img_fea_dim)
                self.ocr_final_model = Image_feature_model(ques_final_size, ocr_final_size)
                self.fixed_ocr_alpha = nn.Parameter(torch.Tensor(1, 1), requires_grad=True)
                torch.nn.init.constant_(self.fixed_ocr_alpha, 0.5)
                ques_final_size += ques_final_size * 2
            else:
                assert False

        self.get_answer = GetFinalScores(ocr_final_size, ques_final_size, yesno='label_yesno' in self.opt, no_answer='label_no_answer' in self.opt, useES='useES' in self.opt)
        if 'fixed_answers' in self.opt:
            self.fixed_ans_classifier = Fixed_answers_predictor(ques_final_size, self.opt['fixed_answers_len'])

        if 'ES_ocr' in self.opt and self.opt['ES_using_way'] == 'post_process':
            self.ES_linear = nn.Linear(multi2one_output_size, ocr_final_size)
            self.ES_ocr_att = Attention(ocr_final_size, opt['hidden_size'], correlation_func = 3, do_similarity = True)
            # elif self.opt['ES_using_way'] == 'as_ocr':

        log.debug('Network build successes')
Esempio n. 5
0
    def __init__(self, opt, word_embedding
                 ):  # word_embedding为SDNet构建的词表中单词的GloVe编码,用于初始化编码层的权重
        super(SDNet, self).__init__()
        print('SDNet model\n')

        self.opt = opt
        set_dropout_prob(0. if not 'DROPOUT' in opt else float(
            opt['DROPOUT']))  # 设置Dropout比率
        set_seq_dropout('VARIATIONAL_DROPOUT' in self.opt)

        x_input_size = 0  # 统计文章单词(x)的feature维度总和
        ques_input_size = 0  # 统计问题单词(ques)的feature维度总和

        self.vocab_size = int(opt['vocab_size'])  # 词表大小
        vocab_dim = int(opt['vocab_dim'])  # GloVe编码维度
        self.vocab_embed = nn.Embedding(num_embeddings=self.vocab_size,
                                        embedding_dim=vocab_dim,
                                        padding_idx=1)
        self.vocab_embed.weight.data = word_embedding  # 用GloVe编码初始化编码层权重

        x_input_size += vocab_dim
        ques_input_size += vocab_dim

        if 'CHAR_CNN' in self.opt:
            print('CHAR_CNN')
            char_vocab_size = int(opt['char_vocab_size'])
            char_dim = int(opt['char_emb_size'])
            char_hidden_size = int(opt['char_hidden_size'])
            self.char_embed = nn.Embedding(num_embeddings=char_vocab_size,
                                           embedding_dim=char_dim,
                                           padding_idx=1)
            self.char_cnn = CNN(input_size=char_dim,
                                window_size=3,
                                output_size=char_hidden_size)
            self.maxpooling = MaxPooling()
            x_input_size += char_hidden_size
            ques_input_size += char_hidden_size

        if 'TUNE_PARTIAL' in self.opt:
            print('TUNE_PARTIAL')
            self.fixed_embedding = word_embedding[opt['tune_partial']:]
        else:
            self.vocab_embed.weight.data.requires_grad = False

        cdim = 0
        self.use_contextual = False

        if 'BERT' in self.opt:
            print('Using BERT')
            self.Bert = Bert(self.opt)
            if 'LOCK_BERT' in self.opt:
                print('Lock BERT\'s weights')
                for p in self.Bert.parameters():  # 锁定BERT权重不进行更新
                    p.requires_grad = False
            if 'BERT_LARGE' in self.opt:
                print('BERT_LARGE')
                bert_dim = 1024
                bert_layers = 24
            else:
                bert_dim = 768
                bert_layers = 12

            print('BERT dim:', bert_dim, 'BERT_LAYERS:', bert_layers)

            if 'BERT_LINEAR_COMBINE' in self.opt:
                print('BERT_LINEAR_COMBINE'
                      )  # 如果对BERT每层输出的编码计算加权和,则需要定义权重alpha和gamma
                self.alphaBERT = nn.Parameter(torch.Tensor(bert_layers),
                                              requires_grad=True)
                self.gammaBERT = nn.Parameter(torch.Tensor(1, 1),
                                              requires_grad=True)
                torch.nn.init.constant(self.alphaBERT, 1.0)
                torch.nn.init.constant(self.gammaBERT, 1.0)

            cdim = bert_dim
            x_input_size += bert_dim
            ques_input_size += bert_dim
        # 单词注意力层
        self.pre_align = Attention(input_size=vocab_dim,
                                   hidden_size=opt['prealign_hidden'],
                                   correlation_func=3,
                                   do_similarity=True)
        x_input_size += vocab_dim
        # 词性和命名实体标注编码
        pos_dim = opt['pos_dim']
        ent_dim = opt['ent_dim']
        self.pos_embedding = nn.Embedding(num_embeddings=len(POS),
                                          embedding_dim=pos_dim)
        self.ent_embedding = nn.Embedding(num_embeddings=len(ENT),
                                          embedding_dim=ent_dim)
        # 文章单词的4维feature,包括词频、精确匹配等
        x_feat_len = 4
        if 'ANSWER_SPAN_IN_CONTEXT_FEATURE' in self.opt:
            print('ANSWER_SPAN_IN_CONTEXT_FEATURE')
            x_feat_len += 1

        x_input_size += pos_dim + ent_dim + x_feat_len

        print('Initially, the vector_sizes [doc, query] are', x_input_size,
              ques_input_size)

        additional_feat = cdim if self.use_contextual else 0

        # 文章RNN层
        self.context_rnn, context_rnn_output_size = RNN_from_opt(
            input_size_=x_input_size,
            hidden_size_=opt['hidden_size'],
            num_layers=opt['in_rnn_layers'],
            concat_rnn=opt['concat_rnn'],
            add_feat=additional_feat)

        # 问题RNN层
        self.ques_rnn, ques_rnn_output_size = RNN_from_opt(
            input_size_=ques_input_size,
            hidden_size_=opt['hidden_size'],
            num_layers=opt['in_rnn_layers'],
            concat_rnn=opt['concat_rnn'],
            add_feat=additional_feat)
        # RNN层输出大小
        print('After Input LSTM, the vector_sizes [doc, query] are [',
              context_rnn_output_size, ques_rnn_output_size, '] * ',
              opt['in_rnn_layers'])

        # 全关注互注意力
        self.deep_attn = DeepAttention(opt=opt,
                                       abstr_list_cnt=opt['in_rnn_layers'],
                                       deep_att_hidden_size_per_abstr=opt[
                                           'deep_att_hidden_size_per_abstr'],
                                       correlation_func=3,
                                       word_hidden_size=vocab_dim +
                                       additional_feat)
        self.deep_attn_input_size = self.deep_attn.rnn_input_size
        self.deep_attn_output_size = self.deep_attn.output_size

        # 问题理解层
        self.high_lvl_ques_rnn, high_lvl_ques_rnn_output_size = RNN_from_opt(
            input_size_=ques_rnn_output_size * opt['in_rnn_layers'],
            hidden_size_=opt['highlvl_hidden_size'],
            num_layers=opt['question_high_lvl_rnn_layers'],
            concat_rnn=True)
        # 统计当前文章单词历史维度
        self.after_deep_attn_size = self.deep_attn_output_size + self.deep_attn_input_size + additional_feat + vocab_dim
        self.self_attn_input_size = self.after_deep_attn_size
        self_attn_output_size = self.deep_attn_output_size

        # 文章单词自注意力层
        self.highlvl_self_attn = Attention(
            input_size=self.self_attn_input_size,
            hidden_size=opt['deep_att_hidden_size_per_abstr'],
            correlation_func=3)
        print('Self deep-attention input is {}-dim'.format(
            self.self_attn_input_size))

        # 文章单词高级RNN层
        self.high_lvl_context_rnn, high_lvl_context_rnn_output_size = RNN_from_opt(
            input_size_=self.deep_attn_output_size + self_attn_output_size,
            hidden_size_=opt['highlvl_hidden_size'],
            num_layers=1,
            concat_rnn=False)
        # 文章单词最终维度
        context_final_size = high_lvl_context_rnn_output_size

        # 问题自注意力层
        print('Do Question self attention')
        self.ques_self_attn = Attention(
            input_size=high_lvl_ques_rnn_output_size,
            hidden_size=opt['query_self_attn_hidden_size'],
            correlation_func=3)
        # 问题单词的最终维度
        ques_final_size = high_lvl_ques_rnn_output_size
        print('Before answer span finding, hidden size are',
              context_final_size, ques_final_size)

        # 线性注意力层,用于获得问题的向量表示
        self.ques_merger = LinearSelfAttn(input_size=ques_final_size)

        # 分数输出层
        self.get_answer = GetFinalScores(x_size=context_final_size,
                                         h_size=ques_final_size)