def set_training_model(self): """ 继承RNNOIE的网络构建函数,添加transformer层 """ logging.debug("Setting TransformerLSTMOIE model") # 输入层三个 word_inputs = Input(shape = (self.sent_maxlen,),dtype="int32",name = "word_inputs") predicate_inputs = Input(shape = (self.sent_maxlen,),dtype="int32",name = "predicate_inputs") postags_inputs = Input(shape = (self.sent_maxlen,),dtype="int32",name = "postags_inputs") #dropout dropout = lambda: Dropout(self.pred_dropout) # 嵌入层 word_embedding_layer = self.embed_word() pos_embedding_layer = self.embed_pos() # 时序特征转换层 bilstm_layers = self.stack_latent_layers(1) position_embedding = Position_Embedding() # Transformer层 mulit_head_layers= self.stack_attention_layers(8,16,self.num_of_latent_layers) self_attention = Attention(8,16) # 全连接层 predict_layer = self.predict_classes() # 构建 emb_output = concatenate([dropout()(word_embedding_layer(word_inputs)),dropout()(word_embedding_layer(predicate_inputs)),pos_embedding_layer(postags_inputs)]) # emb_output = concatenate([dropout()(word_embedding_layer(word_inputs)),pos_embedding_layer(postags_inputs)]) bilstm_output = dropout()(bilstm_layers(emb_output)) # transformer_output = self_attention([bilstm_output,bilstm_output,bilstm_output]) transformer_output = mulit_head_layers(bilstm_output) # emb_output = concatenate([dropout(word_embedding_layer(word_inputs)),dropout(word_embedding_layer(predicate_inputs)),pos_embedding_layer(postags_inputs)]) # bilstm_output = dropout(bilstm_layers(dropout(word_embedding_layer(word_inputs)))) # conect_output = concatenate([bilstm_output,dropout(word_embedding_layer(predicate_inputs)),pos_embedding_layer(postags_inputs)]) # transformer_output = dropout(mulit_head_layers(conect_output)) # transformer_output = dropout(self_attention([bilstm_output,bilstm_output,bilstm_output])) #pos_output = dropout(mulit_head_layers(position_embedding(emb_output))) # output=predict_layer(concatenate([bilstm_output,pos_output,emb_output])) output=predict_layer(BatchNormalization()(concatenate([dropout()(transformer_output),emb_output]))) # output=predict_layer(bilstm_output) # Build model self.model = Model(inputs = [word_inputs,predicate_inputs,postags_inputs], outputs = [output]) # Loss self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy']) self.model.summary() # Save model json to file self.save_model_to_file(os.path.join(self.model_dir, "model.json"))
batch_size = 32 head_num = 8 head_size = 16 # Model Architecture # Text data # define text input and shape text_input = Input(shape=(30, )) # word embedding em_text = Embedding(len(gakki.word_dic) + 1, 200, weights=[gakki.get_embed_matrix()], trainable=True)(text_input) x = Position_Embedding()(em_text) x = Attention(head_num, head_size)([x, x, x]) x = BatchNormalization()(x) x = Dropout(0.25)(x) x = Attention(head_num, head_size)([x, x, x]) x = BatchNormalization()(x) x = Dropout(0.25)(x) x = Attention(head_num, head_size)([x, x, x]) x = BatchNormalization()(x) x = Dropout(0.25)(x) x = Attention(head_num, head_size)([x, x, x]) x = BatchNormalization()(x) x = Dropout(0.25)(x)
#%% batch_size = 5 from keras.models import Model from keras.optimizers import SGD,Adam from keras.layers import * from transformer import Attention,Position_Embedding S_inputs = Input(shape=(None,), dtype='int32') embeddings = Embedding(max_features, 128)(S_inputs) embeddings = Position_Embedding()(embeddings) O_seq = Attention(8,16)([embeddings,embeddings,embeddings]) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.5)(O_seq) outputs = Dense(2, activation='softmax')(O_seq) model = Model(inputs=S_inputs, outputs=outputs) # try using different optimizers and different optimizer configs opt = Adam(lr=0.0005) loss = 'categorical_crossentropy' model.compile(loss=loss, optimizer=opt,
def stack_attention_layers(self, nb_head, size_per_head, n): """ Stack n Attentions """ return lambda x: self.attention_stack( x, [lambda: Attention(nb_head, size_per_head)] * n)