def get_model(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path) for l in bert_model.layers: l.trainable = True T1 = Input(shape=(None,)) T2 = Input(shape=(None,)) T = bert_model([T1, T2]) T = Lambda(lambda x: x[:, 0])(T) output = Dense(64, activation='relu')(T) output = Dense(4, activation='softmax')(output) model = Model([T1, T2], output) model.compile( loss='categorical_crossentropy', optimizer=Adam(learning_rate), # 用足够小的学习率 metrics=['accuracy'] ) model.summary() return model
def create_model(self): model_path = "./{}/".format(BASE_MODEL_DIR) bert = load_trained_model_from_checkpoint( model_path + "bert_config.json", model_path + "bert_model.ckpt", seq_len=self.max_seq_length ) # make bert layer trainable for layer in bert.layers: layer.trainable = True # x1 = Input(shape=(None,)) # x2 = Input(shape=(None,)) # bert_out = bert([x1, x2]) lstm_out = Bidirectional(LSTM(self.lstmDim, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))(bert.output) crf_out = CRF(len(self.label), sparse_target=True)(lstm_out) model = Model(bert.input, crf_out) model.summary() return model
def __init__(self, params): self.params = params self.input_size = 768 self.tagger = None self.maxclauselen = None self.maxseqlen = None pretrained_path = self.params["repfile"] config_path = os.path.join(pretrained_path, 'bert_config.json') checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt') vocab_path = os.path.join(pretrained_path, 'vocab.txt') self.bert = load_trained_model_from_checkpoint(config_path, checkpoint_path) #self.bert._make_predict_function() # Crucial step, otherwise TF will give error. #self.bert.Model.make_predict_function() token_dict = {} with codecs.open(vocab_path, 'r', 'utf8') as reader: for line in reader: token = line.strip() token_dict[token] = len(token_dict) self.tokenizer = Tokenizer(token_dict)
def build_bert(nclass): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) p = Dense(nclass, activation='softmax')(x) model = Model([x1_in, x2_in], p) model.compile(loss='categorical_crossentropy', optimizer=Adam(1e-5), metrics=['accuracy', acc_top2]) print(model.summary()) return model
def build_bert(nclass): global lr_rate bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) #加载预训练模型 for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None,)) x2_in = Input(shape=(None,)) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) #取出[CLS]对应的向量用来做分类 p = Dense(nclass, activation='softmax')(x) #直接dense层softmax输出 model = Model([x1_in, x2_in], p) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr_rate), #用足够小的学习率 metrics=['accuracy', f1])#acc_top2 print(model.summary()) return model
def build_model(self): bert_model = load_trained_model_from_checkpoint( self.config_path, self.checkpoint_path) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) # 取出[CLS]对应的向量用来做分类 p = Dense(1, activation='sigmoid')(x) model = Model([x1_in, x2_in], p) model.compile( loss='binary_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy']) model.summary() return model
def creat_model(self): print('load bert Model start!') model = keras_bert.load_trained_model_from_checkpoint( self.config_path, checkpoint_file=self.check_point_path, seq_len=self.max_len, trainable=True) print('load bert Model end!') inputs = model.inputs embedding = model.output x = Bidirectional(LSTM(units=self.rnn_units, return_sequences=True))(embedding) x = Dropout(self.drop_rate)(x) x = Dense(self.n_class)(x) self.crf = CRF(self.n_class, sparse_target=False) x = self.crf(x) self.model = Model(inputs=inputs, outputs=x) self.model.summary() self.compile() return self.model
def bert_model(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) # 待识别句子输入 x2_in = Input(shape=(None, )) # 待识别句子输入 s1_in = Input(shape=(None, )) # 实体左边界(标签) s2_in = Input(shape=(None, )) # 实体右边界(标签) x1, x2, s1, s2 = x1_in, x2_in, s1_in, s2_in x_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1) x = bert_model([x1, x2]) ps1 = Dense(1, use_bias=False)(x) ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [ps1, x_mask]) ps2 = Dense(1, use_bias=False)(x) ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [ps2, x_mask]) model = Model([x1_in, x2_in], [ps1, ps2]) model = multi_gpu_model(model, gpus=2) train_model = Model([x1_in, x2_in, s1_in, s2_in], [ps1, ps2]) loss1 = K.mean(K.categorical_crossentropy(s1_in, ps1, from_logits=True)) ps2 -= (1 - K.cumsum(s1, 1)) * 1e10 loss2 = K.mean(K.categorical_crossentropy(s2_in, ps2, from_logits=True)) loss = loss1 + loss2 train_model.add_loss(loss) train_model = multi_gpu_model(train_model, gpus=2) train_model.compile(optimizer=Adam(learning_rate)) # train_model.summary() return model, train_model
def test_load_output_layer_num(self): current_path = os.path.dirname(os.path.abspath(__file__)) config_path = os.path.join(current_path, 'test_checkpoint', 'bert_config.json') model_path = os.path.join(current_path, 'test_checkpoint', 'bert_model.ckpt') model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=4) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[0]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[1]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[-1]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[-2]) model.summary() model = load_trained_model_from_checkpoint(config_path, model_path, training=False, output_layer_num=[0, -1]) model.summary()
def get_multi_model(self, weight=None): bert_model = load_trained_model_from_checkpoint(self.config_path, self.checkpoint_path, seq_len=None) le = 1e-3 if self.trainable: le = 1e-4 for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) if not self.trainable: x = Dense(128, activation='relu', name="fc1", kernel_regularizer=regularizers.l2(0.01))(x) x = Dropout(0.3)(x) predict1 = Dense(self.class_num[0], activation='softmax', name="label1")(x) predict2 = Dense(self.class_num[1], activation='softmax', name="label2")(x) model = Model([x1_in, x2_in], [predict1, predict2], name="multi_model") model.summary() if weight != None: print('loading pre_train weight...') model.load_weights(weight, by_name=True) print('Done!') model.compile( loss=['categorical_crossentropy', 'categorical_crossentropy'], optimizer=Adam(le), # 用足够小的学习率 metrics=['accuracy', 'accuracy']) return model
def build_bert(nclass): """ 参考:https://kexue.fm/archives/6736 :param nclass: 文本分类种类 :return: 构建的bert模型 """ # 注意,尽管可以设置seq_len=None,但是仍要保证序列长度不超过512 # 真正调用Bert的也就只有load_trained_model_from_checkpoint 一行代码,剩下的只是普通的Keras操作 bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) # 加载预训练模型 for l in bert_model.layers: l.trainable = True # 构建模型 x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) # “有什么原则来指导Bert后面应该要接哪些层?”。答案是:用尽可能少的层 来完成你的任务。 # 比如上述情感分析 只是一个二分类任务,你就取出第一个向量然后加个Dense(1)就好了, # 不要想着多加几层Dense,更加不要想着接个LSTM再接Dense; # 如果你要做序列标注(比如NER),那你就接个Dense+CRF就好,也不要多加其他东西。 # 总之,额外加的东西尽可能少。一是因为Bert本身就足够复杂,它有足够能力应对你要做的很多任务; # 二来你自己加的层都是随机初始化的,加太多会对Bert的预训练权重造成剧烈扰动,容易降低效果甚至造成模型不收敛 # 这里x1_in,x2_in 作为bert_model的输入是什么意思?引入了Bert作为编码器 x = bert_model([x1_in, x2_in]) # Wraps arbitrary expressions as a Layer object. x = Lambda(lambda x: x[:, 0])(x) # 取出[CLS]对应的向量 用来做分类 p = Dense(nclass, activation='softmax')(x) # 参考:https://keras.io/api/models/model/#model-class # Model groups layers into an object with training and inference features. # 只需要将输入层和输出层作为参数, model = Model([x1_in, x2_in], p) model.compile( loss='categorical_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy', acc_top2]) print(model.summary()) return model
def create_text_match_model(num_labels): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=maxlen) for layer in bert_model.layers: layer.trainable = True # Add Bi-LSTM layer bi_lstm = Bidirectional(LSTM(64, return_sequences=True))(bert_model.output) bi_lstm = Lambda(lambda x: x, output_shape=lambda s: s)(bi_lstm) print(bi_lstm.shape) # Applying hybrid pooling approach to bi_lstm sequence output avg_pool = GlobalAveragePooling1D()(bi_lstm) max_pool = GlobalMaxPooling1D()(bi_lstm) concat = concatenate([avg_pool, max_pool]) # dropout = Dropout(0.3)(concat) output = Dense(num_labels, activation='softmax')(concat) model = Model(bert_model.input, output) model.summary() return model
def build(config): bert_config_path = os.path.join(config.pretrained_path, 'bert_config.json') bert_checkpoint_path = os.path.join(config.pretrained_path, 'bert_model.ckpt') bert_model = load_trained_model_from_checkpoint(bert_config_path, bert_checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) # token ids input x2_in = Input(shape=(None, )) # segment ids input x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) # get first token embedding x = Dropout(config.hidden_dropout_prob)(x) p = Dense(2, activation='softmax')(x) model = Model([x1_in, x2_in], p) return model
def _model_compile_(self): layerN = 12 bert_model = load_trained_model_from_checkpoint( os.path.join(self.pretrain_model_dir, "bert_config.json"), os.path.join(self.pretrain_model_dir, "bert_model.ckpt"), seq_len=None ) for l in bert_model.layers: l.trainable = True x = Lambda(lambda x: x[:, 0])(bert_model.output) prob = Dense(self.n_classes, activation='softmax')(x) model = Model(inputs=bert_model.inputs, outputs=prob) model.summary() model.compile( optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics =['accuracy'] ) plot_model(model, to_file=os.path.join(self.saved_models_dir,'bert_bilstm_model.png'), show_shapes=True) return model
def get_model(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path) for l in bert_model.layers: l.trainable = True input1 = Input(shape=(None, )) input2 = Input(shape=(None, )) input = bert_model([input1, input2]) input = Lambda(lambda x: x[:, 0])(input) output = Dense(2, activation='softmax')(input) model = Model([input1, input2], output) model.compile( loss='categorical_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy']) model.summary() return model
def model_build(len_train): global NUM_CLASSES global BATCH_SIZE global NUM_EPOCHS global MIN_LR global LR bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=MAXLEN, trainable=True) x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) aux_in = Input(shape=(2, )) inputs = bert_model([x1_in, x2_in]) bert = Lambda(lambda x: x[:, 0])(inputs) dense = concatenate([bert, aux_in]) outputs = Dense(NUM_CLASSES, activation='softmax')(dense) model = Model([x1_in, x2_in, aux_in], outputs) decay_steps, warmup_steps = calc_train_steps( len_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, ) model.compile(loss='sparse_categorical_crossentropy', optimizer=AdamWarmup( decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR, min_lr=MIN_LR, ), metrics=['sparse_categorical_accuracy']) del bert_model gc.collect() return model
def get_model(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path) for l in bert_model.layers: l.trainable = True T1 = Input(shape=(None,)) T2 = Input(shape=(None,)) T = bert_model([T1, T2]) T = Lambda(lambda x: x[:, 0])(T) output = Dense(1, activation='sigmoid')(T) model = Model([T1, T2], output) model.compile( loss='binary_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy'] ) model.summary() return model
def get_model(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path) for l in bert_model.layers: l.trainable = True T1 = Input(shape=(None, )) T2 = Input(shape=(None, )) T = bert_model([T1, T2]) T = Lambda(lambda x: x[:, 0])(T) # 取第0列向量,即CLS output = Dense(num_class, activation='softmax')(T) # 多分类时要改成softmax model = Model([T1, T2], output) model.compile( loss='categorical_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy']) model.summary() # 输出参数Param计算过程 return model
def load_model(): """ 构建模型主体 return 模型对象 """ with ss0.as_default(): with ss0.graph.as_default(): bert = load_trained_model_from_checkpoint(CONFIG.config_path, CONFIG.checkpoint_path, seq_len=CONFIG.maxlen) x1 = Input(shape=(None, )) x2 = Input(shape=(None, )) bert_out = bert([x1, x2]) lstm_out = Bidirectional( LSTM(CONFIG.lstmDim, return_sequences=True, dropout=0.2, recurrent_dropout=0.2))(bert_out) crf_out = CRF(len(label), sparse_target=True)(lstm_out) model = Model([x1, x2], crf_out) model.load_weights(CONFIG.relation_key_extract_model_path) return model
def trian_model_bert(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None,)) x2_in = Input(shape=(None,)) x = bert_model([x1_in, x2_in]) # print(x.shape) x = Lambda(lambda x: x[:, 0])(x) # 只取cls用于分类 p = Dense(1, activation='sigmoid')(x) model = Model([x1_in, x2_in], p) model.compile( loss='binary_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy'] ) model.summary() return model
def build_bert(nclass): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) # 注意,尽管可以设置seq_len=None,但是仍要保证序列长度不超过512 for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) p = Dense(nclass, activation='sigmoid')(x) model = Model([x1_in, x2_in], p) model.compile( loss='binary_crossentropy', optimizer=Adam(learning_rate), # 用足够小的学习率 metrics=['accuracy']) print(model.summary()) return model
def get_model(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) # set bert model fix or not for layer in bert_model.layers: layer.trainable = True x1_in = Input(shape=(None,)) x2_in = Input(shape=(None,)) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) # 取出[CLS]对应的向量用来做分类 x = keras.layers.Dropout(rate=DROPOUT_RATE)(x) p = Dense(1, activation='sigmoid')(x) model = Model([x1_in, x2_in], p) model.compile( loss='binary_crossentropy', optimizer=Adam(INIT_LEARNING_RATE), # 用足够小的学习率 metrics=['accuracy'] ) model.summary() return model
def create_cls_model(num_labels): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for layer in bert_model.layers: layer.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) cls_layer = Lambda(lambda x: x[:, 0])(x) #取出[CLS]对应的向量用来做分类 p = Dense(num_labels, activation='softmax')(cls_layer) #多分类 model = Model([x1_in, x2_in], p) model.compile(loss='categorical_crossentropy', optimizer=Adam(1e-5), metrics=['accuracy']) model.summary() return model
def create_model(config_path, checkpoint_path): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) p = Dense(13, activation='sigmoid')(x) model = Model([x1_in, x2_in], p) # val_metric = Metrics([val_x,val_y]) model.compile( loss='binary_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=[micro_f1, macro_f1]) model.summary()
def build_bert(self): bert_model = load_trained_model_from_checkpoint(self.config_path, self.checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True #设定为BERT可训练 x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x11_in = Input(shape=(None, )) x22_in = Input(shape=(None, )) x1 = bert_model([x1_in, x2_in]) x2 = bert_model([x11_in, x22_in]) # print((K.shape(x))) lamb = Lambda(lambda x: x[:, 0]) x1 = lamb(x1) x2 = lamb(x2) x = Concatenate(axis=1)([x1, x2]) # print((K.shape(x))) x = Dense(500, activation='tanh')(x) x = Dropout(0.5)(x) p = Dense(12, activation='softmax')(x) self.model = Model([x1_in, x2_in, x11_in, x22_in], p) self.model.compile( # loss = 'binary_crossentropy', loss='categorical_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy']) self.model.summary()
def trian_model_bertlstmgru(): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x1, x2 = x1_in, x2_in mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1) x = bert_model([x1, x2]) t = Dropout(0.1)(x) t = Bidirectional(LSTM(80, recurrent_dropout=0.1, return_sequences=True))(t) t = Bidirectional(GRU(80, recurrent_dropout=0.1, return_sequences=True))(t) t = Dropout(0.4)(t) t = Dense(160)(t) # t_maxpool = Lambda(seq_maxpool)([t, mask]) # t_maxpool = MaxPool1D()(t) # t_avgpool = Lambda(seq_avgpool)([t, mask]) # t_ = concatenate([t_maxpool, t_avgpool], axis=-1) print(x.shape, t.shape) # x = Lambda(lambda x: x[:, 0])(x) #只取cls用于分类 c = concatenate([x, t], axis=-1) c = Lambda(lambda c: c[:, 0])(c) p = Dense(1, activation='sigmoid')(c) model = Model([x1, x2], p) model.compile( loss='binary_crossentropy', optimizer=Adam(2e-5), # 用足够小的学习率 metrics=['accuracy']) model.summary() return model
def prepare_model(): bert_model = load_trained_model_from_checkpoint(CONFIG_PATH, CHECKPOINT_PATH) bert_model.summary() bert_output_shape = bert_model.output.shape.as_list() num_bert_outputs = bert_output_shape[1] * bert_output_shape[2] top_model_flatten = keras.Sequential([ # Need lambda because flatten does not support masking # https://github.com/keras-team/keras/issues/4978#issuecomment-303985365 keras.layers.Lambda(lambda x: x, output_shape=lambda s:s, input_shape=bert_output_shape[1:]), keras.layers.Flatten(), ]) top_model_flatten.output_shape top_model_flatten.summary() top_model_dense = keras.Sequential([ keras.layers.Dense(1, activation='sigmoid', input_shape=(num_bert_outputs,)) ]) top_model_dense.output_shape top_model_dense.summary() top_model = keras.models.Model(inputs=top_model_flatten.input, outputs=top_model_dense(top_model_flatten.output)) top_model.output_shape top_model.summary() # https://github.com/keras-team/keras/issues/3465#issuecomment-314633196 model = keras.models.Model(inputs=bert_model.input, outputs=top_model(bert_model.output)) # Default learning rate is 0.001. Decrease it to prevent vanishing gratient (predictions all go to 0) because of sigmoid loss. # https://ayearofai.com/rohan-4-the-vanishing-gradient-problem-ec68f76ffb9b # keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) optimizer = keras.optimizers.Adam(lr=0.0003) model.compile( loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.summary() return model, top_model_dense
def get_rcnn_model(config_path, checkpoint_path, train_flag=1): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path) for l in bert_model.layers: l.trainable = True T1 = Input(shape=(None,)) T2 = Input(shape=(None,)) T = bert_model([T1, T2]) T_ = Bidirectional(LSTM(units=32, return_sequences=True))(T) T_ = Bidirectional(LSTM(units=32, return_sequences=True))(T_) t_embed_layer = MaskedConv1D(filters=64, kernel_size=3, padding='same', activation='relu')(T_) pool = MaskedGlobalMaxPool1D()(t_embed_layer) ave = MaskedGlobalAveragePooling1D()(t_embed_layer) T_2 = Add()([pool, ave]) #T = Concatenate()([T, T3_]) # T_2 = Dense(64, activation='relu')(T_2) output = Dense(3, activation='softmax')(T_2) model = Model([T1, T2], output) if train_flag == 1: model.compile( loss='categorical_crossentropy', optimizer=Adam(2e-5), # 用足够小的学习率 metrics=['accuracy'] ) else: model = multi_gpu_model(model, gpus= 2) # 使用几张显卡n等于几 model.compile( loss='categorical_crossentropy', optimizer=Adam(2e-5), # 用足够小的学习率 metrics=['accuracy'] ) model.summary() return model
def build_dis_att_with_bert_zhou(): bert_token_input = Input(shape=(250,), name='bert_token') bert_segment_input = Input(shape=(250,), name='bert_segment') bert_m1 = Input(shape=[250], name='bert_m1') bert_m2 = Input(shape=[250], name='bert_m2') bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=250) for l in bert_model.layers: l.trainable = False wordVector = bert_model([bert_token_input, bert_segment_input]) e1_doc_vec, e2_doc_vec = None, None e1_doc_vec = Lambda(get_entity_vector_zhou, output_shape=get_entity_shape)([wordVector, bert_m1]) e2_doc_vec = Lambda(get_entity_vector_zhou, output_shape=get_entity_shape)([wordVector, bert_m2]) entity_dense = Dense(768*2, activation='relu') e1_doc_vec = entity_dense(e1_doc_vec) e2_doc_vec = entity_dense(e2_doc_vec) sub=Subtract()([e1_doc_vec,e2_doc_vec]) # lstm encoded_seq = Bidirectional(GRU(768, dropout=0.5, recurrent_dropout=0.5, return_sequences=True))(wordVector) slice_1 = Lambda(slice, arguments={'h1': 249, 'h2': 250})(encoded_seq) slice_1 = Lambda(change_shape, output_shape=out_change_shape)(slice_1) att_sub = NormalAttention()([sub, encoded_seq]) att_e1 = Lambda(my_entity_att, output_shape=out_entity_att)([e1_doc_vec, encoded_seq]) att_e2 = Lambda(my_entity_att, output_shape=out_entity_att)([e2_doc_vec, encoded_seq]) z = concatenate([slice_1,att_sub,att_e1,att_e2]) z = Dropout(0.3)(z) z = Dense(256, activation='tanh')(z) main_output = Dense(5, activation='softmax', name='main_output')(z) # (?,5) model = Model(inputs=[bert_token_input, bert_segment_input, bert_m1, bert_m2], outputs=main_output) model.compile(optimizer="Adam", loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) return model
def model_bert_txt_lstm(config_path, checkpoint_path, metric=f1, max_txt_len=100): bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) inp_txt_x1 = Input(shape=(max_txt_len, )) inp_txt_x2 = Input(shape=(max_txt_len, )) for i in range(20): bert_model.layers[-i].trainable x1 = bert_model([inp_txt_x1, inp_txt_x2]) x1 = Lambda(lambda x: x)(x1) x1 = SpatialDropout1D(0.3)(x1) max_pool = GlobalMaxPooling1D()(x1) avg_pool = GlobalAveragePooling1D()(x1) pools = Concatenate()([max_pool, avg_pool]) predictions = Dense(1, activation='sigmoid')(pools) model = Model(inputs=[inp_txt_x1, inp_txt_x2], outputs=predictions) adam = optimizers.Adam(lr=learning_rate) model.compile(optimizer=adam, loss='binary_crossentropy', metrics=[metric]) return model