Ejemplo n.º 1
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding
        # cinn = tf.keras.layers.Input(shape=(self.clen,), name='context_input')
        # qinn = tf.keras.layers.Input(shape=(self.qlen,), name='question_input')
        #
        # embedding_layer = tf.keras.layers.Embedding(self.max_features,
        #                                             self.emb_size,
        #                                             embeddings_initializer='uniform',
        #                                             )
        # cemb = embedding_layer(cinn)
        # qemb = embedding_layer(qinn)
        cinn = tf.keras.layers.Input(shape=(2, ), name='context_input')
        qinn = tf.keras.layers.Input(shape=(2, ), name='question_input')
        #context部分的Glove与char卷积拼接
        Glove_c_matrix = cinn[0]
        c_char_dict = cinn[1]
        #将字符的向量矩阵卷积池化
        cemb = np.zeros((Glove_c_matrix.shape))
        for key in c_char_dict:
            char_embedding_matrx = c_char_dict[key]
            #char_embedding_matrx 30*100的矩阵
            char_embedding = tf.layers.conv1d(char_embedding_matrx, 5, 3)
            #5 28*1D
            char_embedding = tf.layers.max_pooling1d(char_embedding,
                                                     pool_size=2,
                                                     strides=2)
            #将多通道合成一个通道
            char_embedding = tf.concat(0, char_embedding)
            #将词对应的GLove跟char矩阵融合
            cemb[key] = tf.concat(0, [Glove_c_matrix[key], char_embedding])
            # question部分的Glove与char卷积拼接
            Glove_q_matrix = qinn[0]
            q_char_dict = qinn[1]
            # 将字符的向量矩阵卷积池化
            qemb = np.zeros((Glove_q_matrix.shape))
            for key in q_char_dict:
                char_embedding_matrx = q_char_dict[key]
                # char_embedding_matrx 30*100的矩阵
                char_embedding = tf.layers.conv1d(char_embedding_matrx, 5, 3)
                # 5 28*1D
                char_embedding = tf.layers.max_pooling1d(char_embedding,
                                                         pool_size=2,
                                                         strides=2)
                # 将多通道合成一个通道
                char_embedding = tf.concat(0, char_embedding)
                # 将词对应的GLove跟char矩阵融合
                cemb[key] = tf.concat(0, [Glove_q_matrix[key], char_embedding])

        for i in range(self.num_highway_layers):
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'QHighway{i}')
            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [cinn, qinn]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 2
0
Archivo: main.py Proyecto: AILAN110/mrc
    def build_model(self):
        """
        构建模型
        :return:
        """
        ###################模型定义#######################
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding 
        w_cinn = tf.keras.layers.Input(shape=(self.clen,), name='context_word_input')   #单词,shape不含batch_size
        w_qinn = tf.keras.layers.Input(shape=(self.qlen,), name='question__word_input') #单词

        #加载glove向量
        vocab_size=123252   #vocab_size
        embedding_matrix=preprocess.load_glove()
        word_embedding_layer=tf.keras.layers.Embedding(vocab_size,300,weights=[embedding_matrix],trainable=False)
        wc_emb=word_embedding_layer(w_cinn)
        wq_emb=word_embedding_layer(w_qinn)

        #CharCnn
        c_cinn = tf.keras.layers.Input(shape=(self.clen, 20), name='context_word_input')  # char
        c_qinn = tf.keras.layers.Input(shape=(self.qlen, 20), name='question__word_input')  # char
        char_embedding_layer=tf.keras.layers.Embedding(self.max_features,self.emb_size,embeddings_initializer='uniform')
        cc_emb=char_embedding_layer(c_cinn)
        cq_emb=char_embedding_layer(c_qinn)

        cc_emb=tf.reshape(cc_emb,shape=[None,20,self.emb_size])
        cq_emb=tf.reshape(cq_emb,shape=[None,20,self.emb_size])
        conv1d=tf.keras.layers.Conv1D(filters=6,kernel_size=4,padding='same',activation="relu")   #input_shape
        cc_emb=tf.transpose(cc_emb,perm=[0,2,1])
        cq_emb=tf.transpose(cq_emb,perm=[0,2,1])
        cc_emb=conv1d(cc_emb)   #[b*seq_len,6,xx]
        cq_emb=conv1d(cq_emb)
        #最大池化
        cc_emb=tf.transpose(cc_emb,perm=[0,2,1])
        cq_emb=tf.transpose(cq_emb,perm=[0,2,1])
        max_pool_1d=tf.keras.layers.GlobalMaxPooling1D()
        cc_emb=tf.reshape(max_pool_1d(cc_emb),shape=[None,self.clen,6])
        cq_emb=tf.reshape(max_pool_1d(cq_emb),shape=[None,self.qlen,6])
        #concat
        cemb=tf.concat([wc_emb,cc_emb],axis=-1)
        qemb=tf.concat([wq_emb,cq_emb],axis=-1)
        #全连接
        dense_1=tf.keras.layers.Dense(self.emb_size,activation=tf.keras.activations.softmax)
        cemb = dense_1(cemb)
        qemb = dense_1(qemb)
        # cinn = tf.keras.layers.Input(shape=(self.clen,), name='context_input')   #可看作placeholder
        # qinn = tf.keras.layers.Input(shape=(self.qlen,), name='question_input')

        # embedding_layer = tf.keras.layers.Embedding(self.max_features,
        #                                             self.emb_size,
        #                                             embeddings_initializer='uniform',
        #                                             )
        # cemb = embedding_layer(cinn)    #看作tf.nn.embedding_lookup()
        # qemb = embedding_layer(qinn)    # Model方式,下一层在call中包住上一层

        for i in range(self.num_highway_layers):
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')   #自定义网络:Layer
            chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}')
            cemb = chighway(cemb)    #输入进入
            qemb = qhighway(qemb)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(
                self.emb_size,
                recurrent_dropout=self.encoder_dropout,
                return_sequences=True,
                name='RNNEncoder'
            ), name='BiRNNEncoder'
        )

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')  #相似度
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)   #代码需补充
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(
                    self.emb_size,
                    recurrent_dropout=self.decoder_dropout,
                    return_sequences=True,
                    name=f'RNNDecoder{i}'
                ), name=f'BiRNNDecoder{i}'
            )
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer([cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])  #最终输出

        # inn = [cinn, qinn]   #输入
        inn = [c_cinn,w_cinn, c_qinn,w_qinn]   #输入

        self.model = tf.keras.models.Model(inn, out)   #固定:输入、输出(fit时数据要对应,多任务out也可以是list),代替Sequential
        self.model.summary(line_length=128)    #输出各层参数状况:类似tf 1.x的summary
        ###############模型编译######################
        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(
            optimizer=optimizer,   #优化器
            loss=negative_avg_log_error,    #计算loss,多任务可以用list,可以设置loss_weights=[loss1权重,loss2权重。。。]
            metrics=[accuracy]   #评估指标
        )
Ejemplo n.º 3
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding 
        # cinn_c = tf.keras.layers.Input(shape=(self.clen,self.max_char_len), name='context_input_char')
        # qinn_c = tf.keras.layers.Input(shape=(self.qlen,self.max_char_len), name='question_input_char')
        # embedding_layer_char = tf.keras.layers.Embedding(self.max_features, self.emb_size, embeddings_initializer='uniform')
        #
        # emb_cc = embedding_layer_char(cinn_c)
        # emb_qc = embedding_layer_char(qinn_c)
        #
        # c_conv_out = []
        # filter_sizes = sum(list(np.array(self.conv_layers).T[0]))
        # assert filter_sizes==self.emb_size
        # for filters, kernel_size in self.conv_layers:
        #     conv = tf.keras.layers.Conv2D(filters=filters,kernel_size=[kernel_size,self.emb_size],strides=1,activation='relu',padding='same')(emb_cc)
        #     conv = tf.reduce_max(conv, 2)
        #     c_conv_out.append(conv)
        # c_conv_out = tf.keras.layers.concatenate(c_conv_out)
        #
        # q_conv_out = []
        # for filters, kernel_size in self.conv_layers:
        #     conv = tf.keras.layers.Conv2D(filters=filters,kernel_size=[kernel_size,self.emb_size],strides=1,activation='relu',padding='same')(emb_qc)
        #     conv = tf.reduce_max(conv, 2)
        #     q_conv_out.append(conv)
        # q_conv_out = tf.keras.layers.concatenate(q_conv_out)

        cinn_w = tf.keras.layers.Input(shape=(self.clen,), name='context_input_word')
        qinn_w = tf.keras.layers.Input(shape=(self.qlen,), name='question_input_word')
        embedding_layer_word = tf.keras.layers.Embedding(self.vocab_size, self.emb_size, 
            embeddings_initializer=tf.constant_initializer(np.array(self.embedding_matrix)), trainable=False)

        emb_cw = embedding_layer_word(cinn_w)
        emb_qw = embedding_layer_word(qinn_w)
        print('emb_cw',emb_cw.shape)
        # cemb = tf.concat([emb_cw, c_conv_out], axis=2)
        # qemb = tf.concat([emb_qw, q_conv_out], axis=2)

        cemb = emb_cw
        qemb = emb_qw
        print('cemb',cemb.shape)
        for i in range(self.num_highway_layers):
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}')
            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(
                self.emb_size,
                recurrent_dropout=self.encoder_dropout,
                return_sequences=True,
                name='RNNEncoder'
            ), name='BiRNNEncoder'
        )

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(
                    self.emb_size,
                    recurrent_dropout=self.decoder_dropout,
                    return_sequences=True,
                    name=f'RNNDecoder{i}'
                ), name=f'BiRNNDecoder{i}'
            )
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer([cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [ cinn_w, qinn_w]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(
            optimizer=optimizer,
            loss=negative_avg_log_error,
            metrics=[accuracy]
        )
Ejemplo n.º 4
0
    def build_model(self):
        cinn = tf.keras.layers.Input(shape=(self.clen, ), name='CInn')
        qinn = tf.keras.layers.Input(shape=(self.qlen, ), name='QInn')

        embedding_layer = tf.keras.layers.Embedding(self.max_features,
                                                    self.emb_size)
        cemb = embedding_layer(cinn)
        qemb = embedding_layer(qinn)

        for i in range(self.num_highway_layers):
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'QHighway{i}')

            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')

        cencode = encoder_layer(cemb)
        qencode = encoder_layer(qemb)

        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [cinn, qinn]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 5
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding
        '''
        layers.Input(
                shape=None,
                batch_size=None,
                name=None,
                dtype=None,
                sparse=False,
                tensor=None,
                ragged=False,
                **kwargs,
                )
        # Word Embedding Layer
        # 用GloVe初始化 word embedding
        '''
        cinn_c = tf.keras.layers.Input(shape=(self.clen,), self.max_char_len,name='context_input_char')
        qinn_c = tf.keras.layers.Input(shape=(self.qlen,), self.max_char_lenname='question_input_char')
        embedding_layer_char = tf.keras.layers.Embedding(self.max_features,  # 词汇表最大数量
                                                         self.emb_size,  # 词向量维度
                                                         embeddings_initializer='uniform',
                                                         )
        '''
        input_dim	词汇表的维度(总共有多少个不相同的词)
        output_dim	嵌入词空间的维度
        input_length	输入语句的长度
        embeddings_initializer	
        embeddings_regularizer	
        embeddings_constraint	
        mask_zero

        输入形状:二维张量(batch_size,input_length)
        输出形状:三维张量(batch_size,input_length,output_dim)
        '''
        # 经过embedding得到emb_cc ,emb_cq
        # Char Embedding Layer:通过character-level的CNNs,把每个词映射到向量空间
        emb_cc = embedding_layer_char(cinn_c)
        emb_cq = embedding_layer_char(qinn_c)

        # 对c,q分别进行卷积和池化
        c_conv_out = []
        q_conv_out = []

        filter_sizes = sum(list(np.array(self.conv_layers).T[0]))
        assert filter_sizes == self.emb_size

        # 卷积
        # filters:卷积过滤器的数量,对应输出的维数
        # kernel_size:整数,过滤器的大小,如果为一个整数,则宽和高相同
        # strides:横向和纵向的步长,如果为一个整数,则横向和纵向相同
        # activation:激活函数,None是线性函数
        # padding:same表示不够卷积核大小的块就补0,所以输出和输入形状相同。(valid:表示不够卷积核大小的块,则丢弃)
        for filters, kernel_size in self.conv_layers:
            conv = tf.keras.layers.Conv2D(filters=filters, kernel_size=[kernel_size, self.emb_size], strides=1,
                                          activation='relu',padding='same')(emb_cc)
            conv = tf.reduce_max(conv, 2)  # 池化
            c_conv_out.append(conv)

            conv = tf.keras.layers.Conv2D(filters=filters, kernel_size=[kernel_size, self.emb_size], strides=1,
                                        activation='relu',padding='same')(emb_cq)
            conv = tf.reduce_max(conv, 2)  # 池化
            q_conv_out.append(conv)
        c_conv_out = tf.keras.layers.concatanate(c_conv_out)
        q_conv_out = tf.keras.layers.concatanate(q_conv_out)

        cinn_w = tf.keras.layers.Input(shape=(self.clen,), name='context_input_word')
        qinn_w = tf.keras.layers.Input(shape=(self.qlen,), name='question_input_word')

        # Word Embedding Layer:通过训练好的word embedding将每个词映射到一个向量空间。
        embedding_layer_word = tf.keras.layers.Embedding(
            self.vocab_size,    # 在此是词典大小
            self.emb_size,
            embeddings_initializer=tf.keras.layers_initializer(self.embedding_matrix), trainable=False
        )

        emb_cw = embedding_layer_word(cinn_w)
        emb_qw = embedding_layer_word(qinn_w)

        cemb = tf.concat([emb_cw, c_conv_out], axis = 2)  # tf.concatt把多个array沿着某一个维度拼接在一起,成100dim
        qemb = tf.concat([emb_qw, q_conv_out], axis = 2)

        for i in range(self.num_highway_layers):  # 高速神经网络的个数 2
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}')
            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(
                self.emb_size,
                recurrent_dropout=self.encoder_dropout,
                return_sequences=True,
                name='RNNEncoder'
            ), name='BiRNNEncoder'
        )

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层:结合query和context向量获得具有context中每个词具有query-aware特征的向量。
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention') # Context-to-query 注意力计算
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') # Query-to-context 注意力计算

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层:使用RNN扫描context
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(
                    self.emb_size,
                    recurrent_dropout=self.decoder_dropout,
                    return_sequences=True,
                    name=f'RNNDecoder{i}'
                ), name=f'BiRNNDecoder{i}'
            )
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer([cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [cinn, qinn]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,loss=negative_avg_log_error,metrics=[accuracy])
Ejemplo n.º 6
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding
        ############添加 获取词向量
        embedding_matrix = load_embedding_matrix()

        cinn = tf.keras.layers.Input(shape=(self.clen, ), name='context_input')
        qinn = tf.keras.layers.Input(shape=(self.qlen, ),
                                     name='question_input')

        ############添加 获取word embedding
        word_embedding = tf.keras.layers.Embedding(
            self.max_features,
            self.emb_size,
            weights=[embedding_matrix],
            trainable=False,
        )
        ############添加 获取cnn char embedding
        cnn_char_embedding = tf.keras.layers.Conv1D(self.max_features,
                                                    5,
                                                    activation='tanh',
                                                    trainable=True)
        # cemb = embedding_layer(cinn)
        # qemb = embedding_layer(qinn)
        ############添加 俩embedding 结合
        cemb = tf.keras.layers.Concatenate(axis=-1)(
            [word_embedding, cnn_char_embedding])
        qemb = tf.keras.layers.Concatenate(axis=-1)(
            [word_embedding, cnn_char_embedding])

        for i in range(self.num_highway_layers):
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'QHighway{i}')
            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [cinn, qinn]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 7
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding

        # 定义字符级的context, question, 词级的context,question的输入
        cemb = tf.keras.layers.Input(shape=(self.word_clen,
                                            self.word_emb_size),
                                     name='word_context_input')
        qemb = tf.keras.layers.Input(shape=(self.word_qlen,
                                            self.word_emb_size),
                                     name='word_question_input')

        # # 词向量的embedding层
        # word_embedding_layer = tf.keras.layers.Embedding(self.max_word_features, self.word_emb_size, weights=[self.glove_w2vec_matrix])
        # # 字符级向量的embedding层
        # char_embedding_layer = tf.keras.layers.Embedding(self.max_char_features,
        #                                             self.char_emb_size,
        #                                             embeddings_initializer='uniform',
        #                                             )
        # # 输入到各层中
        # char_cemb = char_embedding_layer(char_cinn)
        # char_qemb = char_embedding_layer(char_qinn)
        # word_cemb = word_embedding_layer(word_cinn)
        # word_qemb = word_embedding_layer(word_qinn)

        print(cemb.shape)
        print(qemb.shape)
        # print('cemb{} = []'.format(self.num_highway_layers))
        # exec('cemb{} = []'.format(self.num_highway_layers))
        # print(cemb2)
        # exec('qemb{} = []'.format(self.num_highway_layers))
        # chighway_inputs = []
        # qhighway_inputs = []
        # chighway_inputs.append(cemb)
        # qhighway_inputs.append(qemb)

        highway_layer0 = layers.Highway(name='Highway0')
        chighway0 = tf.keras.layers.TimeDistributed(highway_layer0,
                                                    name='CHighway0')
        qhighway0 = tf.keras.layers.TimeDistributed(highway_layer0,
                                                    name='QHighway0')
        cemb1 = chighway0(cemb)
        qemb1 = qhighway0(qemb)

        highway_layer1 = layers.Highway(name='Highway1')
        chighway1 = tf.keras.layers.TimeDistributed(highway_layer1,
                                                    name='CHighway1')
        qhighway1 = tf.keras.layers.TimeDistributed(highway_layer1,
                                                    name='QHighway1')
        cemb2 = chighway1(cemb1)
        qemb2 = qhighway1(qemb1)

        # for i in range(self.num_highway_layers):
        #     """
        #     使用两层高速神经网络
        #     """
        #     highway_layer = layers.Highway(name=f'Highway{i}')
        #     chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}')
        #     qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}')
        #     chighway_inputs.append(chighway(chighway_inputs[i]))
        #     qhighway_inputs.append(qhighway(qhighway_inputs[i]))

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.word_emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')
        # cemb_highway = chighway_inputs[-1]
        # qemb_highway = qhighway_inputs[-1]
        cencode = encoder_layer(cemb2)  # 编码context
        qencode = encoder_layer(qemb2)  # 编码question

        # cencode = encoder_layer(exec('cemb{}'.format(self.num_highway_layers)))  # 编码context
        # qencode = encoder_layer(exec('qemb{}'.format(self.num_highway_layers)))  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.word_emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        # inn = [char_cinn, word_cinn, char_qinn, word_qinn]
        inn = [cemb, qemb]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 8
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # 定义字符级的context, question, 词级的context,question的输入
        cemb = tf.keras.layers.Input(shape=(self.word_clen,
                                            self.word_emb_size),
                                     name='word_context_input')
        qemb = tf.keras.layers.Input(shape=(self.word_qlen,
                                            self.word_emb_size),
                                     name='word_question_input')

        highway_layer0 = layers.Highway(name='Highway0')
        chighway0 = tf.keras.layers.TimeDistributed(highway_layer0,
                                                    name='CHighway0')
        qhighway0 = tf.keras.layers.TimeDistributed(highway_layer0,
                                                    name='QHighway0')
        cemb1 = chighway0(cemb)
        qemb1 = qhighway0(qemb)

        highway_layer1 = layers.Highway(name='Highway1')
        chighway1 = tf.keras.layers.TimeDistributed(highway_layer1,
                                                    name='CHighway1')
        qhighway1 = tf.keras.layers.TimeDistributed(highway_layer1,
                                                    name='QHighway1')
        cemb2 = chighway1(cemb1)
        qemb2 = qhighway1(qemb1)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.word_emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')

        cencode = encoder_layer(exec('cemb{}'.format(
            self.num_highway_layers)))  # 编码context
        qencode = encoder_layer(exec('qemb{}'.format(
            self.num_highway_layers)))  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.word_emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        # inn = [char_cinn, word_cinn, char_qinn, word_qinn]
        inn = [cemb, qemb]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 9
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding

        # 定义字符级的context, question, 词级的context,question的输入
        char_cinn = tf.keras.layers.Input(shape=(
            self.word_clen,
            self.char_clen,
        ),
                                          name='char_context_input')
        char_qinn = tf.keras.layers.Input(shape=(
            self.word_qlen,
            self.char_qlen,
        ),
                                          name='char_question_input')
        word_cinn = tf.keras.layers.Input(shape=(self.word_clen, ),
                                          name='word_context_input')
        word_qinn = tf.keras.layers.Input(shape=(self.word_qlen, ),
                                          name='word_question_input')

        # 词向量的embedding层
        word_embedding_layer = tf.keras.layers.Embedding(
            self.max_word_features,
            self.word_emb_size,
            weights=[self.glove_w2vec_matrix])
        # 字符级向量的embedding层
        char_embedding_layer = tf.keras.layers.Embedding(
            self.max_char_features,
            self.char_emb_size,
            embeddings_initializer='uniform',
        )
        # 输入到各层中
        char_cemb = char_embedding_layer(char_cinn)
        char_qemb = char_embedding_layer(char_qinn)
        word_cemb = word_embedding_layer(word_cinn)
        word_qemb = word_embedding_layer(word_qinn)

        # context char embedding经过cnn后作为字符级别的embedding
        char_c_convolution_output = []
        for num_filters, filter_width in self.conv_layers:
            conv = tf.keras.layers.Conv1D(filters=num_filters,
                                          kernel_size=filter_width,
                                          activation='relu',
                                          name='Conv1D_C_{}_{}'.format(
                                              num_filters,
                                              filter_width))(char_cemb)
            # print(conv.shape)
            pool = tf.keras.layers.MaxPool2D(
                data_format='channels_first',
                pool_size=(conv.shape[2], 1),
                name='MaxPoolingOverTime_C_{}_{}'.format(
                    num_filters, filter_width))(conv)
            # print(pool.shape)
            char_c_convolution_output.append(pool)

        char_cemb = tf.keras.layers.concatenate(char_c_convolution_output,
                                                axis=-1)
        char_cemb = tf.squeeze(char_cemb, axis=2)

        # question char embedding经过cnn后作为字符级别的embedding
        char_q_convolution_output = []
        for num_filters, filter_width in self.conv_layers:
            conv = tf.keras.layers.Convolution1D(filters=num_filters,
                                                 kernel_size=filter_width,
                                                 activation='relu',
                                                 name='Conv1D_Q_{}_{}'.format(
                                                     num_filters,
                                                     filter_width))(char_qemb)
            pool = tf.keras.layers.MaxPool2D(
                data_format='channels_first',
                pool_size=(conv.shape[2], 1),
                name='MaxPoolingOverTime_Q_{}_{}'.format(
                    num_filters, filter_width))(conv)
            char_q_convolution_output.append(pool)

        char_qemb = tf.keras.layers.concatenate(char_q_convolution_output,
                                                axis=-1)
        char_qemb = tf.squeeze(char_qemb, axis=2)

        # word级别和char级别的concat
        cemb = tf.keras.layers.concatenate([word_cemb, char_cemb])
        qemb = tf.keras.layers.concatenate([word_qemb, char_qemb])
        print(cemb.shape)
        print(qemb.shape)
        for i in range(self.num_highway_layers):
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'QHighway{i}')
            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(
                130,
                # self.word_emb_size,
                recurrent_dropout=self.encoder_dropout,
                return_sequences=True,
                name='RNNEncoder'),
            name='BiRNNEncoder')

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(
                    130,
                    # self.word_emb_size,
                    recurrent_dropout=self.decoder_dropout,
                    return_sequences=True,
                    name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [char_cinn, word_cinn, char_qinn, word_qinn]
        # inn = [char_cinn, char_qinn]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 10
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        cemb = tf.keras.layers.Input(shape=(self.clen, 768),
                                     name='word_context_input')
        qemb = tf.keras.layers.Input(shape=(self.qlen, 768),
                                     name='word_question_input')

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [cemb, qemb]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 11
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding

        word_embedding_layer = tf.keras.layers.Embedding(
            self.word_vocab_size,
            self.emb_size,
            weights=[self.glove_weight],
            trainable=False)

        char_embedding_layer = tf.keras.layers.Embedding(
            self.char_vocab_size,
            self.emb_size,
            embeddings_initializer='uniform')
        # char
        # (None, 30, 10)
        cinn_char = tf.keras.layers.Input(shape=(
            self.clen,
            max_char_len,
        ),
                                          name='context_input_char')
        qinn_char = tf.keras.layers.Input(shape=(
            self.qlen,
            max_char_len,
        ),
                                          name='question_input_char')

        # word
        # (None, 30)
        cinn_word = tf.keras.layers.Input(shape=(self.clen, ),
                                          name='context_input_word')
        qinn_word = tf.keras.layers.Input(shape=(self.qlen, ),
                                          name='question_input_word')

        # word
        # (None, 30, 50)
        cemb = word_embedding_layer(cinn_word)
        # (None, 30, 50)
        qemb = word_embedding_layer(qinn_word)

        # char feature
        # (None, 30, 10, 50)
        c_char_emb = char_embedding_layer(cinn_char)
        # (None, 30, 10, 50)
        q_char_emb = char_embedding_layer(qinn_char)

        # (None, 30, 6)
        cemb_c = self.multi_conv1d(c_char_emb)
        qemb_q = self.multi_conv1d(q_char_emb)

        # (None, 30, 56)
        cemb = tf.concat([cemb, cemb_c], axis=2)
        qemb = tf.concat([qemb, qemb_q], axis=2)

        for i in range(self.num_highway_layers):
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'QHighway{i}')
            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        # 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [cinn_word, qinn_word, cinn_char, qinn_char]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])
Ejemplo n.º 12
0
    def build_model(self):
        """
        构建模型
        :return:
        """
        # 1 embedding 层
        # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding
        cinn = tf.keras.layers.Input(shape=(self.clen, ), name='context_input')
        qinn = tf.keras.layers.Input(shape=(self.qlen, ),
                                     name='question_input')

        # embedding_layer = tf.keras.layers.Embedding(self.max_features,
        #                                             self.emb_size,
        #                                             embeddings_initializer='uniform')

        # word embedding layer
        word_embed_layer = tf.keras.layers.Embedding(
            self.vocab_size,
            self.word_embedding_dim,
            weights=[self.embedding_matrix],
            trainable=False)

        c_w_emb = word_embedding_layer(cinn)
        q_w_emb = word_embedding_layer(qinn)

        #char embedding layer
        c_embed_layer = tf.keras.layers.Embedding(
            self.max_features, self.emb_size, embeddings_initializer='uniform')
        filter_sizes = list(map(int, config.out_channel_dims.split(',')))
        heights = list(map(int, config.filter_heights.split(',')))
        dco = config.char_out_size
        assert sum(filter_sizes) == dco, (filter_sizes, dco)
        with tf.variable_scope("conv"):
            xx = multi_conv1d(Acx,
                              filter_sizes,
                              heights,
                              "VALID",
                              self.is_train,
                              config.keep_prob,
                              scope="xx")
            if config.share_cnn_weights:
                tf.get_variable_scope().reuse_variables()
                qq = multi_conv1d(Acq,
                                  filter_sizes,
                                  heights,
                                  "VALID",
                                  self.is_train,
                                  config.keep_prob,
                                  scope="xx")
            else:
                qq = multi_conv1d(Acq,
                                  filter_sizes,
                                  heights,
                                  "VALID",
                                  self.is_train,
                                  config.keep_prob,
                                  scope="qq")
                xx = tf.reshape(xx, [-1, M, JX, dco])
                qq = tf.reshape(qq, [-1, JQ, dco])

        for i in range(self.num_highway_layers):
            """
            使用两层高速神经网络
            """
            highway_layer = layers.Highway(name=f'Highway{i}')
            chighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'CHighway{i}')
            qhighway = tf.keras.layers.TimeDistributed(highway_layer,
                                                       name=f'QHighway{i}')
            cemb = chighway(cemb)
            qemb = qhighway(qemb)

        ## 2. 上下文嵌入层
        # 编码器 双向LSTM
        encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
            self.emb_size,
            recurrent_dropout=self.encoder_dropout,
            return_sequences=True,
            name='RNNEncoder'),
                                                      name='BiRNNEncoder')

        cencode = encoder_layer(cemb)  # 编码context
        qencode = encoder_layer(qemb)  # 编码question

        # 3.注意流层
        similarity_layer = layers.Similarity(name='SimilarityLayer')
        similarity_matrix = similarity_layer([cencode, qencode])

        c2q_att_layer = layers.C2QAttention(name='C2QAttention')
        q2c_att_layer = layers.Q2CAttention(name='Q2CAttention')

        c2q_att = c2q_att_layer(similarity_matrix, qencode)
        q2c_att = q2c_att_layer(similarity_matrix, cencode)

        # 上下文嵌入向量的生成
        merged_ctx_layer = layers.MergedContext(name='MergedContext')
        merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att)

        # 4.模型层
        modeled_ctx = merged_ctx
        for i in range(self.num_decoders):
            decoder_layer = tf.keras.layers.Bidirectional(
                tf.keras.layers.LSTM(self.emb_size,
                                     recurrent_dropout=self.decoder_dropout,
                                     return_sequences=True,
                                     name=f'RNNDecoder{i}'),
                name=f'BiRNNDecoder{i}')
            modeled_ctx = decoder_layer(merged_ctx)

        # 5. 输出层
        span_begin_layer = layers.SpanBegin(name='SpanBegin')
        span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx])

        span_end_layer = layers.SpanEnd(name='SpanEnd')
        span_end_prob = span_end_layer(
            [cencode, merged_ctx, modeled_ctx, span_begin_prob])

        output_layer = layers.Combine(name='CombineOutputs')
        out = output_layer([span_begin_prob, span_end_prob])

        inn = [cinn, qinn]

        self.model = tf.keras.models.Model(inn, out)
        self.model.summary(line_length=128)

        optimizer = tf.keras.optimizers.Adadelta(lr=1e-2)
        self.model.compile(optimizer=optimizer,
                           loss=negative_avg_log_error,
                           metrics=[accuracy])