def __init__(self, gen_emb, domain_emb, num_classes=3, dropout=0.5, crf=False): super(Model_cnn, self).__init__() self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0], gen_emb.shape[1]) self.gen_embedding.weight = torch.nn.Parameter( torch.from_numpy(gen_emb), requires_grad=False) self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0], domain_emb.shape[1]) self.domain_embedding.weight = torch.nn.Parameter( torch.from_numpy(domain_emb), requires_grad=False) self.conv1 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 5, padding=2) self.conv2 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 3, padding=1) #self.conv7=torch.nn.Conv1d(gen_emb.shape[1]+domain_emb.shape[1], 64, 7, padding=3 ) #self.conv9=torch.nn.Conv1d(gen_emb.shape[1]+domain_emb.shape[1], 64, 9, padding=4 ) self.dropout = torch.nn.Dropout(dropout) # self.highwaya=layers.Highway(256,1,self.conv3) #self.highwayb=layers.Highway(256,1,self.conv4) #self.highwayc=layers.Highway(256,1,self.conv5) self.conv3 = torch.nn.Conv1d(256, 256, 5, padding=2) self.conv4 = torch.nn.Conv1d(256, 256, 5, padding=2) self.conv5 = torch.nn.Conv1d(256, 256, 5, padding=2) self.highwaya = layers.Highway(83, 1, self.conv3) self.highwayb = layers.Highway(83, 1, self.conv4) self.highwayc = layers.Highway(83, 1, self.conv5) self.linear_ae = torch.nn.Linear(256, num_classes) self.crf_flag = crf if self.crf_flag: from allennlp.modules import ConditionalRandomField self.crf = ConditionalRandomField(num_classes)
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding # cinn = tf.keras.layers.Input(shape=(self.clen,), name='context_input') # qinn = tf.keras.layers.Input(shape=(self.qlen,), name='question_input') # # embedding_layer = tf.keras.layers.Embedding(self.max_features, # self.emb_size, # embeddings_initializer='uniform', # ) # cemb = embedding_layer(cinn) # qemb = embedding_layer(qinn) cinn = tf.keras.layers.Input(shape=(2, ), name='context_input') qinn = tf.keras.layers.Input(shape=(2, ), name='question_input') #context部分的Glove与char卷积拼接 Glove_c_matrix = cinn[0] c_char_dict = cinn[1] #将字符的向量矩阵卷积池化 cemb = np.zeros((Glove_c_matrix.shape)) for key in c_char_dict: char_embedding_matrx = c_char_dict[key] #char_embedding_matrx 30*100的矩阵 char_embedding = tf.layers.conv1d(char_embedding_matrx, 5, 3) #5 28*1D char_embedding = tf.layers.max_pooling1d(char_embedding, pool_size=2, strides=2) #将多通道合成一个通道 char_embedding = tf.concat(0, char_embedding) #将词对应的GLove跟char矩阵融合 cemb[key] = tf.concat(0, [Glove_c_matrix[key], char_embedding]) # question部分的Glove与char卷积拼接 Glove_q_matrix = qinn[0] q_char_dict = qinn[1] # 将字符的向量矩阵卷积池化 qemb = np.zeros((Glove_q_matrix.shape)) for key in q_char_dict: char_embedding_matrx = q_char_dict[key] # char_embedding_matrx 30*100的矩阵 char_embedding = tf.layers.conv1d(char_embedding_matrx, 5, 3) # 5 28*1D char_embedding = tf.layers.max_pooling1d(char_embedding, pool_size=2, strides=2) # 将多通道合成一个通道 char_embedding = tf.concat(0, char_embedding) # 将词对应的GLove跟char矩阵融合 cemb[key] = tf.concat(0, [Glove_q_matrix[key], char_embedding]) for i in range(self.num_highway_layers): """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [cinn, qinn] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding # cinn_c = tf.keras.layers.Input(shape=(self.clen,self.max_char_len), name='context_input_char') # qinn_c = tf.keras.layers.Input(shape=(self.qlen,self.max_char_len), name='question_input_char') # embedding_layer_char = tf.keras.layers.Embedding(self.max_features, self.emb_size, embeddings_initializer='uniform') # # emb_cc = embedding_layer_char(cinn_c) # emb_qc = embedding_layer_char(qinn_c) # # c_conv_out = [] # filter_sizes = sum(list(np.array(self.conv_layers).T[0])) # assert filter_sizes==self.emb_size # for filters, kernel_size in self.conv_layers: # conv = tf.keras.layers.Conv2D(filters=filters,kernel_size=[kernel_size,self.emb_size],strides=1,activation='relu',padding='same')(emb_cc) # conv = tf.reduce_max(conv, 2) # c_conv_out.append(conv) # c_conv_out = tf.keras.layers.concatenate(c_conv_out) # # q_conv_out = [] # for filters, kernel_size in self.conv_layers: # conv = tf.keras.layers.Conv2D(filters=filters,kernel_size=[kernel_size,self.emb_size],strides=1,activation='relu',padding='same')(emb_qc) # conv = tf.reduce_max(conv, 2) # q_conv_out.append(conv) # q_conv_out = tf.keras.layers.concatenate(q_conv_out) cinn_w = tf.keras.layers.Input(shape=(self.clen,), name='context_input_word') qinn_w = tf.keras.layers.Input(shape=(self.qlen,), name='question_input_word') embedding_layer_word = tf.keras.layers.Embedding(self.vocab_size, self.emb_size, embeddings_initializer=tf.constant_initializer(np.array(self.embedding_matrix)), trainable=False) emb_cw = embedding_layer_word(cinn_w) emb_qw = embedding_layer_word(qinn_w) print('emb_cw',emb_cw.shape) # cemb = tf.concat([emb_cw, c_conv_out], axis=2) # qemb = tf.concat([emb_qw, q_conv_out], axis=2) cemb = emb_cw qemb = emb_qw print('cemb',cemb.shape) for i in range(self.num_highway_layers): """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder' ), name='BiRNNEncoder' ) cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}' ), name=f'BiRNNDecoder{i}' ) modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer([cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [ cinn_w, qinn_w] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile( optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy] )
def build_model(self): """ 构建模型 :return: """ ###################模型定义####################### # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding w_cinn = tf.keras.layers.Input(shape=(self.clen,), name='context_word_input') #单词,shape不含batch_size w_qinn = tf.keras.layers.Input(shape=(self.qlen,), name='question__word_input') #单词 #加载glove向量 vocab_size=123252 #vocab_size embedding_matrix=preprocess.load_glove() word_embedding_layer=tf.keras.layers.Embedding(vocab_size,300,weights=[embedding_matrix],trainable=False) wc_emb=word_embedding_layer(w_cinn) wq_emb=word_embedding_layer(w_qinn) #CharCnn c_cinn = tf.keras.layers.Input(shape=(self.clen, 20), name='context_word_input') # char c_qinn = tf.keras.layers.Input(shape=(self.qlen, 20), name='question__word_input') # char char_embedding_layer=tf.keras.layers.Embedding(self.max_features,self.emb_size,embeddings_initializer='uniform') cc_emb=char_embedding_layer(c_cinn) cq_emb=char_embedding_layer(c_qinn) cc_emb=tf.reshape(cc_emb,shape=[None,20,self.emb_size]) cq_emb=tf.reshape(cq_emb,shape=[None,20,self.emb_size]) conv1d=tf.keras.layers.Conv1D(filters=6,kernel_size=4,padding='same',activation="relu") #input_shape cc_emb=tf.transpose(cc_emb,perm=[0,2,1]) cq_emb=tf.transpose(cq_emb,perm=[0,2,1]) cc_emb=conv1d(cc_emb) #[b*seq_len,6,xx] cq_emb=conv1d(cq_emb) #最大池化 cc_emb=tf.transpose(cc_emb,perm=[0,2,1]) cq_emb=tf.transpose(cq_emb,perm=[0,2,1]) max_pool_1d=tf.keras.layers.GlobalMaxPooling1D() cc_emb=tf.reshape(max_pool_1d(cc_emb),shape=[None,self.clen,6]) cq_emb=tf.reshape(max_pool_1d(cq_emb),shape=[None,self.qlen,6]) #concat cemb=tf.concat([wc_emb,cc_emb],axis=-1) qemb=tf.concat([wq_emb,cq_emb],axis=-1) #全连接 dense_1=tf.keras.layers.Dense(self.emb_size,activation=tf.keras.activations.softmax) cemb = dense_1(cemb) qemb = dense_1(qemb) # cinn = tf.keras.layers.Input(shape=(self.clen,), name='context_input') #可看作placeholder # qinn = tf.keras.layers.Input(shape=(self.qlen,), name='question_input') # embedding_layer = tf.keras.layers.Embedding(self.max_features, # self.emb_size, # embeddings_initializer='uniform', # ) # cemb = embedding_layer(cinn) #看作tf.nn.embedding_lookup() # qemb = embedding_layer(qinn) # Model方式,下一层在call中包住上一层 for i in range(self.num_highway_layers): """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') #自定义网络:Layer chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) #输入进入 qemb = qhighway(qemb) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder' ), name='BiRNNEncoder' ) cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') #相似度 similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) #代码需补充 q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}' ), name=f'BiRNNDecoder{i}' ) modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer([cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) #最终输出 # inn = [cinn, qinn] #输入 inn = [c_cinn,w_cinn, c_qinn,w_qinn] #输入 self.model = tf.keras.models.Model(inn, out) #固定:输入、输出(fit时数据要对应,多任务out也可以是list),代替Sequential self.model.summary(line_length=128) #输出各层参数状况:类似tf 1.x的summary ###############模型编译###################### optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile( optimizer=optimizer, #优化器 loss=negative_avg_log_error, #计算loss,多任务可以用list,可以设置loss_weights=[loss1权重,loss2权重。。。] metrics=[accuracy] #评估指标 )
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding ''' layers.Input( shape=None, batch_size=None, name=None, dtype=None, sparse=False, tensor=None, ragged=False, **kwargs, ) # Word Embedding Layer # 用GloVe初始化 word embedding ''' cinn_c = tf.keras.layers.Input(shape=(self.clen,), self.max_char_len,name='context_input_char') qinn_c = tf.keras.layers.Input(shape=(self.qlen,), self.max_char_lenname='question_input_char') embedding_layer_char = tf.keras.layers.Embedding(self.max_features, # 词汇表最大数量 self.emb_size, # 词向量维度 embeddings_initializer='uniform', ) ''' input_dim 词汇表的维度(总共有多少个不相同的词) output_dim 嵌入词空间的维度 input_length 输入语句的长度 embeddings_initializer embeddings_regularizer embeddings_constraint mask_zero 输入形状:二维张量(batch_size,input_length) 输出形状:三维张量(batch_size,input_length,output_dim) ''' # 经过embedding得到emb_cc ,emb_cq # Char Embedding Layer:通过character-level的CNNs,把每个词映射到向量空间 emb_cc = embedding_layer_char(cinn_c) emb_cq = embedding_layer_char(qinn_c) # 对c,q分别进行卷积和池化 c_conv_out = [] q_conv_out = [] filter_sizes = sum(list(np.array(self.conv_layers).T[0])) assert filter_sizes == self.emb_size # 卷积 # filters:卷积过滤器的数量,对应输出的维数 # kernel_size:整数,过滤器的大小,如果为一个整数,则宽和高相同 # strides:横向和纵向的步长,如果为一个整数,则横向和纵向相同 # activation:激活函数,None是线性函数 # padding:same表示不够卷积核大小的块就补0,所以输出和输入形状相同。(valid:表示不够卷积核大小的块,则丢弃) for filters, kernel_size in self.conv_layers: conv = tf.keras.layers.Conv2D(filters=filters, kernel_size=[kernel_size, self.emb_size], strides=1, activation='relu',padding='same')(emb_cc) conv = tf.reduce_max(conv, 2) # 池化 c_conv_out.append(conv) conv = tf.keras.layers.Conv2D(filters=filters, kernel_size=[kernel_size, self.emb_size], strides=1, activation='relu',padding='same')(emb_cq) conv = tf.reduce_max(conv, 2) # 池化 q_conv_out.append(conv) c_conv_out = tf.keras.layers.concatanate(c_conv_out) q_conv_out = tf.keras.layers.concatanate(q_conv_out) cinn_w = tf.keras.layers.Input(shape=(self.clen,), name='context_input_word') qinn_w = tf.keras.layers.Input(shape=(self.qlen,), name='question_input_word') # Word Embedding Layer:通过训练好的word embedding将每个词映射到一个向量空间。 embedding_layer_word = tf.keras.layers.Embedding( self.vocab_size, # 在此是词典大小 self.emb_size, embeddings_initializer=tf.keras.layers_initializer(self.embedding_matrix), trainable=False ) emb_cw = embedding_layer_word(cinn_w) emb_qw = embedding_layer_word(qinn_w) cemb = tf.concat([emb_cw, c_conv_out], axis = 2) # tf.concatt把多个array沿着某一个维度拼接在一起,成100dim qemb = tf.concat([emb_qw, q_conv_out], axis = 2) for i in range(self.num_highway_layers): # 高速神经网络的个数 2 """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder' ), name='BiRNNEncoder' ) cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层:结合query和context向量获得具有context中每个词具有query-aware特征的向量。 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') # Context-to-query 注意力计算 q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') # Query-to-context 注意力计算 c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层:使用RNN扫描context modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}' ), name=f'BiRNNDecoder{i}' ) modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer([cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [cinn, qinn] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer,loss=negative_avg_log_error,metrics=[accuracy])
def build_model(self): cinn = tf.keras.layers.Input(shape=(self.clen, ), name='CInn') qinn = tf.keras.layers.Input(shape=(self.qlen, ), name='QInn') embedding_layer = tf.keras.layers.Embedding(self.max_features, self.emb_size) cemb = embedding_layer(cinn) qemb = embedding_layer(qinn) for i in range(self.num_highway_layers): highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') cencode = encoder_layer(cemb) qencode = encoder_layer(qemb) similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [cinn, qinn] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding # 定义字符级的context, question, 词级的context,question的输入 cemb = tf.keras.layers.Input(shape=(self.word_clen, self.word_emb_size), name='word_context_input') qemb = tf.keras.layers.Input(shape=(self.word_qlen, self.word_emb_size), name='word_question_input') # # 词向量的embedding层 # word_embedding_layer = tf.keras.layers.Embedding(self.max_word_features, self.word_emb_size, weights=[self.glove_w2vec_matrix]) # # 字符级向量的embedding层 # char_embedding_layer = tf.keras.layers.Embedding(self.max_char_features, # self.char_emb_size, # embeddings_initializer='uniform', # ) # # 输入到各层中 # char_cemb = char_embedding_layer(char_cinn) # char_qemb = char_embedding_layer(char_qinn) # word_cemb = word_embedding_layer(word_cinn) # word_qemb = word_embedding_layer(word_qinn) print(cemb.shape) print(qemb.shape) # print('cemb{} = []'.format(self.num_highway_layers)) # exec('cemb{} = []'.format(self.num_highway_layers)) # print(cemb2) # exec('qemb{} = []'.format(self.num_highway_layers)) # chighway_inputs = [] # qhighway_inputs = [] # chighway_inputs.append(cemb) # qhighway_inputs.append(qemb) highway_layer0 = layers.Highway(name='Highway0') chighway0 = tf.keras.layers.TimeDistributed(highway_layer0, name='CHighway0') qhighway0 = tf.keras.layers.TimeDistributed(highway_layer0, name='QHighway0') cemb1 = chighway0(cemb) qemb1 = qhighway0(qemb) highway_layer1 = layers.Highway(name='Highway1') chighway1 = tf.keras.layers.TimeDistributed(highway_layer1, name='CHighway1') qhighway1 = tf.keras.layers.TimeDistributed(highway_layer1, name='QHighway1') cemb2 = chighway1(cemb1) qemb2 = qhighway1(qemb1) # for i in range(self.num_highway_layers): # """ # 使用两层高速神经网络 # """ # highway_layer = layers.Highway(name=f'Highway{i}') # chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') # qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') # chighway_inputs.append(chighway(chighway_inputs[i])) # qhighway_inputs.append(qhighway(qhighway_inputs[i])) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( self.word_emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') # cemb_highway = chighway_inputs[-1] # qemb_highway = qhighway_inputs[-1] cencode = encoder_layer(cemb2) # 编码context qencode = encoder_layer(qemb2) # 编码question # cencode = encoder_layer(exec('cemb{}'.format(self.num_highway_layers))) # 编码context # qencode = encoder_layer(exec('qemb{}'.format(self.num_highway_layers))) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(self.word_emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) # inn = [char_cinn, word_cinn, char_qinn, word_qinn] inn = [cemb, qemb] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding ############添加 获取词向量 embedding_matrix = load_embedding_matrix() cinn = tf.keras.layers.Input(shape=(self.clen, ), name='context_input') qinn = tf.keras.layers.Input(shape=(self.qlen, ), name='question_input') ############添加 获取word embedding word_embedding = tf.keras.layers.Embedding( self.max_features, self.emb_size, weights=[embedding_matrix], trainable=False, ) ############添加 获取cnn char embedding cnn_char_embedding = tf.keras.layers.Conv1D(self.max_features, 5, activation='tanh', trainable=True) # cemb = embedding_layer(cinn) # qemb = embedding_layer(qinn) ############添加 俩embedding 结合 cemb = tf.keras.layers.Concatenate(axis=-1)( [word_embedding, cnn_char_embedding]) qemb = tf.keras.layers.Concatenate(axis=-1)( [word_embedding, cnn_char_embedding]) for i in range(self.num_highway_layers): """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [cinn, qinn] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # 定义字符级的context, question, 词级的context,question的输入 cemb = tf.keras.layers.Input(shape=(self.word_clen, self.word_emb_size), name='word_context_input') qemb = tf.keras.layers.Input(shape=(self.word_qlen, self.word_emb_size), name='word_question_input') highway_layer0 = layers.Highway(name='Highway0') chighway0 = tf.keras.layers.TimeDistributed(highway_layer0, name='CHighway0') qhighway0 = tf.keras.layers.TimeDistributed(highway_layer0, name='QHighway0') cemb1 = chighway0(cemb) qemb1 = qhighway0(qemb) highway_layer1 = layers.Highway(name='Highway1') chighway1 = tf.keras.layers.TimeDistributed(highway_layer1, name='CHighway1') qhighway1 = tf.keras.layers.TimeDistributed(highway_layer1, name='QHighway1') cemb2 = chighway1(cemb1) qemb2 = qhighway1(qemb1) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( self.word_emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') cencode = encoder_layer(exec('cemb{}'.format( self.num_highway_layers))) # 编码context qencode = encoder_layer(exec('qemb{}'.format( self.num_highway_layers))) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(self.word_emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) # inn = [char_cinn, word_cinn, char_qinn, word_qinn] inn = [cemb, qemb] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding # 定义字符级的context, question, 词级的context,question的输入 char_cinn = tf.keras.layers.Input(shape=( self.word_clen, self.char_clen, ), name='char_context_input') char_qinn = tf.keras.layers.Input(shape=( self.word_qlen, self.char_qlen, ), name='char_question_input') word_cinn = tf.keras.layers.Input(shape=(self.word_clen, ), name='word_context_input') word_qinn = tf.keras.layers.Input(shape=(self.word_qlen, ), name='word_question_input') # 词向量的embedding层 word_embedding_layer = tf.keras.layers.Embedding( self.max_word_features, self.word_emb_size, weights=[self.glove_w2vec_matrix]) # 字符级向量的embedding层 char_embedding_layer = tf.keras.layers.Embedding( self.max_char_features, self.char_emb_size, embeddings_initializer='uniform', ) # 输入到各层中 char_cemb = char_embedding_layer(char_cinn) char_qemb = char_embedding_layer(char_qinn) word_cemb = word_embedding_layer(word_cinn) word_qemb = word_embedding_layer(word_qinn) # context char embedding经过cnn后作为字符级别的embedding char_c_convolution_output = [] for num_filters, filter_width in self.conv_layers: conv = tf.keras.layers.Conv1D(filters=num_filters, kernel_size=filter_width, activation='relu', name='Conv1D_C_{}_{}'.format( num_filters, filter_width))(char_cemb) # print(conv.shape) pool = tf.keras.layers.MaxPool2D( data_format='channels_first', pool_size=(conv.shape[2], 1), name='MaxPoolingOverTime_C_{}_{}'.format( num_filters, filter_width))(conv) # print(pool.shape) char_c_convolution_output.append(pool) char_cemb = tf.keras.layers.concatenate(char_c_convolution_output, axis=-1) char_cemb = tf.squeeze(char_cemb, axis=2) # question char embedding经过cnn后作为字符级别的embedding char_q_convolution_output = [] for num_filters, filter_width in self.conv_layers: conv = tf.keras.layers.Convolution1D(filters=num_filters, kernel_size=filter_width, activation='relu', name='Conv1D_Q_{}_{}'.format( num_filters, filter_width))(char_qemb) pool = tf.keras.layers.MaxPool2D( data_format='channels_first', pool_size=(conv.shape[2], 1), name='MaxPoolingOverTime_Q_{}_{}'.format( num_filters, filter_width))(conv) char_q_convolution_output.append(pool) char_qemb = tf.keras.layers.concatenate(char_q_convolution_output, axis=-1) char_qemb = tf.squeeze(char_qemb, axis=2) # word级别和char级别的concat cemb = tf.keras.layers.concatenate([word_cemb, char_cemb]) qemb = tf.keras.layers.concatenate([word_qemb, char_qemb]) print(cemb.shape) print(qemb.shape) for i in range(self.num_highway_layers): """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( 130, # self.word_emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM( 130, # self.word_emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [char_cinn, word_cinn, char_qinn, word_qinn] # inn = [char_cinn, char_qinn] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding word_embedding_layer = tf.keras.layers.Embedding( self.word_vocab_size, self.emb_size, weights=[self.glove_weight], trainable=False) char_embedding_layer = tf.keras.layers.Embedding( self.char_vocab_size, self.emb_size, embeddings_initializer='uniform') # char # (None, 30, 10) cinn_char = tf.keras.layers.Input(shape=( self.clen, max_char_len, ), name='context_input_char') qinn_char = tf.keras.layers.Input(shape=( self.qlen, max_char_len, ), name='question_input_char') # word # (None, 30) cinn_word = tf.keras.layers.Input(shape=(self.clen, ), name='context_input_word') qinn_word = tf.keras.layers.Input(shape=(self.qlen, ), name='question_input_word') # word # (None, 30, 50) cemb = word_embedding_layer(cinn_word) # (None, 30, 50) qemb = word_embedding_layer(qinn_word) # char feature # (None, 30, 10, 50) c_char_emb = char_embedding_layer(cinn_char) # (None, 30, 10, 50) q_char_emb = char_embedding_layer(qinn_char) # (None, 30, 6) cemb_c = self.multi_conv1d(c_char_emb) qemb_q = self.multi_conv1d(q_char_emb) # (None, 30, 56) cemb = tf.concat([cemb, cemb_c], axis=2) qemb = tf.concat([qemb, qemb_q], axis=2) for i in range(self.num_highway_layers): """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) # 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [cinn_word, qinn_word, cinn_char, qinn_char] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])
def build_model(self): """ 构建模型 :return: """ # 1 embedding 层 # TODO:homework:使用glove word embedding(或自己训练的w2v) 和 CNN char embedding cinn = tf.keras.layers.Input(shape=(self.clen, ), name='context_input') qinn = tf.keras.layers.Input(shape=(self.qlen, ), name='question_input') # embedding_layer = tf.keras.layers.Embedding(self.max_features, # self.emb_size, # embeddings_initializer='uniform') # word embedding layer word_embed_layer = tf.keras.layers.Embedding( self.vocab_size, self.word_embedding_dim, weights=[self.embedding_matrix], trainable=False) c_w_emb = word_embedding_layer(cinn) q_w_emb = word_embedding_layer(qinn) #char embedding layer c_embed_layer = tf.keras.layers.Embedding( self.max_features, self.emb_size, embeddings_initializer='uniform') filter_sizes = list(map(int, config.out_channel_dims.split(','))) heights = list(map(int, config.filter_heights.split(','))) dco = config.char_out_size assert sum(filter_sizes) == dco, (filter_sizes, dco) with tf.variable_scope("conv"): xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") if config.share_cnn_weights: tf.get_variable_scope().reuse_variables() qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") else: qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq") xx = tf.reshape(xx, [-1, M, JX, dco]) qq = tf.reshape(qq, [-1, JQ, dco]) for i in range(self.num_highway_layers): """ 使用两层高速神经网络 """ highway_layer = layers.Highway(name=f'Highway{i}') chighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'CHighway{i}') qhighway = tf.keras.layers.TimeDistributed(highway_layer, name=f'QHighway{i}') cemb = chighway(cemb) qemb = qhighway(qemb) ## 2. 上下文嵌入层 # 编码器 双向LSTM encoder_layer = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( self.emb_size, recurrent_dropout=self.encoder_dropout, return_sequences=True, name='RNNEncoder'), name='BiRNNEncoder') cencode = encoder_layer(cemb) # 编码context qencode = encoder_layer(qemb) # 编码question # 3.注意流层 similarity_layer = layers.Similarity(name='SimilarityLayer') similarity_matrix = similarity_layer([cencode, qencode]) c2q_att_layer = layers.C2QAttention(name='C2QAttention') q2c_att_layer = layers.Q2CAttention(name='Q2CAttention') c2q_att = c2q_att_layer(similarity_matrix, qencode) q2c_att = q2c_att_layer(similarity_matrix, cencode) # 上下文嵌入向量的生成 merged_ctx_layer = layers.MergedContext(name='MergedContext') merged_ctx = merged_ctx_layer(cencode, c2q_att, q2c_att) # 4.模型层 modeled_ctx = merged_ctx for i in range(self.num_decoders): decoder_layer = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(self.emb_size, recurrent_dropout=self.decoder_dropout, return_sequences=True, name=f'RNNDecoder{i}'), name=f'BiRNNDecoder{i}') modeled_ctx = decoder_layer(merged_ctx) # 5. 输出层 span_begin_layer = layers.SpanBegin(name='SpanBegin') span_begin_prob = span_begin_layer([merged_ctx, modeled_ctx]) span_end_layer = layers.SpanEnd(name='SpanEnd') span_end_prob = span_end_layer( [cencode, merged_ctx, modeled_ctx, span_begin_prob]) output_layer = layers.Combine(name='CombineOutputs') out = output_layer([span_begin_prob, span_end_prob]) inn = [cinn, qinn] self.model = tf.keras.models.Model(inn, out) self.model.summary(line_length=128) optimizer = tf.keras.optimizers.Adadelta(lr=1e-2) self.model.compile(optimizer=optimizer, loss=negative_avg_log_error, metrics=[accuracy])