# -*- coding: utf-8 -*- # @File : model.py # @Author : AaronJny # @Time : 2019/12/25 # @Desc : from bert4keras.models import build_transformer_model import tensorflow as tf from dataset import keep_words import settings model = build_transformer_model(settings.CONFIG_PATH, settings.CHECKPOINT_PATH, application='lm', keep_tokens=keep_words) model.summary() # loss fun,交叉熵 # 输入的数据,从第二个字符开始,可以作为正确的目标结果(输入是没有经过one-hot编码的) y_true = model.input[0][:, 1:] # 目标mask y_mask = model.get_layer('Embedding-Token').output_mask[:, 1:] y_mask = tf.cast(y_mask, tf.float32) # 预测结果,到倒数第二个(包括)时结束 y_pred = model.output[:, :-1] cross_entropy = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred) cross_entropy = tf.reduce_sum(cross_entropy * y_mask) / tf.reduce_sum(y_mask) model.add_loss(cross_entropy) model.compile(tf.keras.optimizers.Adam(1e-5))
def build_transformer_model_with_unilm(): """带unilm的bert模型 """ bert = build_transformer_model(config_path, with_mlm='linear', application='unilm', return_keras_model=False) token_ids = bert.model.inputs[0] segment_ids = bert.model.inputs[1] proba = bert.model.output def unilm_loss(inputs, mask=None): """计算loss的函数,需要封装为一个层 """ y_true, y_pred, segment_ids = inputs y_true, y_pred = y_true[:, 1:], y_pred[:, :-1] if mask is None: mask = 1.0 else: mask = K.cast(mask[1][:, 1:], floatx) segment_ids = K.cast(segment_ids, floatx) mask = mask * segment_ids[:, 1:] loss = K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) loss = K.sum(loss * mask) / (K.sum(mask) + K.epsilon()) return loss def unilm_acc(inputs, mask=None): """计算准确率的函数,需要封装为一个层 """ y_true, y_pred, segment_ids = inputs y_true, y_pred = K.cast(y_true[:, 1:], floatx), y_pred[:, :-1] if mask is None: mask = 1.0 else: mask = K.cast(mask[1][:, 1:], floatx) segment_ids = K.cast(segment_ids, floatx) mask = mask * segment_ids[:, 1:] acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred) acc = K.sum(acc * mask) / (K.sum(mask) + K.epsilon()) return acc token_proba_segment = [token_ids, proba, segment_ids] unilm_loss = Lambda(unilm_loss, name='unilm_loss')(token_proba_segment) unilm_acc = Lambda(unilm_acc, name='unilm_acc')(token_proba_segment) train_model = Model(bert.model.inputs, [unilm_loss, unilm_acc]) loss = { 'unilm_loss': lambda y_true, y_pred: y_pred, 'unilm_acc': lambda y_true, y_pred: K.stop_gradient(y_pred), } return bert, train_model, loss
""" 后面的代码使用的是bert类型的模型,如果你用的是albert,那么前几行请改为: model = build_transformer_model( config_path, checkpoint_path, model='albert', ) output_layer = 'Transformer-FeedForward-Norm' output = model.get_layer(output_layer).get_output_at(bert_layers - 1) """ model = build_transformer_model( config_path, checkpoint_path, ) output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(num_labels)(output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) model.summary() model.compile(loss=CRF.sparse_loss, optimizer=Adam(learing_rate), metrics=[CRF.sparse_accuracy])
y_true = y_true[:, 1:] # 目标token_ids y_pred = y_pred[:, :-1] # 预测序列,错开一位 loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss c_in = Input(shape=(1, )) c = Embedding(num_classes, 128)(c_in) c = Reshape((128, ))(c) # Bert模型 model = build_transformer_model( config_path, checkpoint_path, application='lm', keep_tokens=keep_tokens, # 只保留keep_tokens中的字,精简原字表 layer_norm_cond=c, additional_input_layers=c_in, ) output = CrossEntropy(1)([model.inputs[0], model.outputs[0]]) model = Model(model.inputs, output) model.compile(optimizer=Adam(1e-5)) model.summary() class RandomSentiment(AutoRegressiveDecoder): """根据情感标签(0:负,1:正)随机生成一批句子 """ @AutoRegressiveDecoder.wraps(default_rtype='probas')
def compute_loss(self, inputs, mask=None): y_true, y_pred = inputs if mask[1] is None: y_mask = 1.0 else: y_mask = K.cast(mask[1], K.floatx())[:, 1:] y_true = y_true[:, 1:] # 目标token_ids y_pred = y_pred[:, :-1] # 预测序列,错开一位 loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss model = build_transformer_model( config_path, checkpoint_path, application='lm', keep_tokens=keep_tokens, # 只保留keep_tokens中的字,精简原字表 ) output = CrossEntropy(1)([model.inputs[0], model.outputs[0]]) model = Model(model.inputs, output) model.compile(optimizer=Adam(1e-5)) model.summary() class ChessPlayer(object): """交互式下棋程序 """ def move_to_chinese(self, move): """将单步走法转为中文描述
num_labels = len(label2id.keys()) * 2 + 1 return id2label, label2id, num_labels id2label, label2id, num_labels = get_id2label(label_path="medical_train.ner.labels.json") max_text_length = 128 batch_size = 16 bert_layers = 3 learing_rate = 1e-5 # bert_layers越小,学习率应该要越大 crf_lr_multiplier = 1000 # 必要时扩大CRF层的学习率 # 建立分词器 tokenizer = Tokenizer(rbtl_dict_path, do_lower_case=True) model = build_transformer_model( rbtl_config_path, rbtl_checkpoint_path, ) output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(num_labels)(output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) model.summary() model.compile( loss=CRF.sparse_loss, optimizer=Adam(learing_rate), metrics=[CRF.sparse_accuracy]
batch_token_ids, batch_segment_ids = [], [] train_generator = data_generator(train_data, batch_size) # Loss function class CrossEntropy(Loss): def compute_loss(self, inputs, mask=None): y_true, y_mask, y_pred = inputs y_true = y_true[:, 1:] # 目标token_ids y_mask = y_mask[:, 1:] # segment_ids,刚好指示了要预测的部分 y_pred = y_pred[:, :-1] # 预测序列,错开一位 loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss model = build_transformer_model(config_path, checkpoint_path, application='unilm', keep_tokens=keep_tokens) output = CrossEntropy(2)(model.inputs + model.outputs) model = Model(model.inputs, output) model.compile(optimizer=Adam(1e-5)) class AutoTitle(AutoRegressiveDecoder): @AutoRegressiveDecoder.set_rtype('probas') def predict(self, inputs, output_ids, step): token_ids, segment_ids = inputs token_ids = np.concatenate([token_ids, output_ids], 1) segment_ids = np.concatenate([segment_ids, np.ones_like(output_ids)], 1) return model.predict([token_ids, segment_ids])[:, -1] def generate(self, text, emotion, topk=2): max_c_len = maxlen - self.maxlen token_ids, segment_ids = tokenizer.encode(text, max_length=max_c_len) token_ids[0] = emotion
import numpy as np from bert4keras.models import build_transformer_model from bert4keras.tokenizers import Tokenizer from bert4keras.snippets import AutoRegressiveDecoder from bert4keras.snippets import uniout config_path = '/root/kg/bert/chinese_nezha_gpt_L-12_H-768_A-12/config.json' checkpoint_path = '/root/kg/bert/chinese_nezha_gpt_L-12_H-768_A-12/gpt.ckpt' dict_path = '/root/kg/bert/chinese_nezha_gpt_L-12_H-768_A-12/vocab.txt' tokenizer = Tokenizer(dict_path, do_lower_case=True) # 建立分词器 model = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, segment_vocab_size=0, # 去掉segmeng_ids输入 application='lm', ) # 建立模型,加载权重 class ArticleCompletion(AutoRegressiveDecoder): """基于随机采样的文章续写 """ @AutoRegressiveDecoder.wraps(default_rtype='probas') def predict(self, inputs, output_ids, states): token_ids = np.concatenate([inputs[0], output_ids], 1) return self.last_token(model).predict(token_ids) def generate(self, text, n=1, topp=0.95): token_ids = tokenizer.encode(text)[0][:-1] results = self.random_sample([token_ids], n, topp=topp) # 基于随机采样
def __init__(self,topK): self.topK = topK self.tokenizer = Tokenizer(Config.BERT_VOCAB_PATH,do_lower_case=True) self.model = build_transformer_model(Config.BERT_CONFIG_PATH,Config.BERT_CHECKPOINT_PATH,with_mlm = True) self.token_ids, self.segment_ids = self.tokenizer.encode(' ')
maxlen=maxlen) batch_token_ids.append(token_ids) batch_segment_ids.append(segment_ids) batch_labels.append([label]) if len(batch_token_ids) == self.batch_size or is_end: batch_token_ids = sequence_padding(batch_token_ids) batch_segment_ids = sequence_padding(batch_segment_ids) batch_labels = sequence_padding(batch_labels) yield [batch_token_ids, batch_segment_ids], batch_labels batch_token_ids, batch_segment_ids, batch_labels = [], [], [] # 加载预训练模型 bert = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, return_keras_model=False, ) output = Dropout(rate=0.1)(bert.model.output) output = Dense(units=2, activation='softmax', kernel_initializer=bert.initializer)(output) model = keras.models.Model(bert.model.input, output) model.summary() model.compile( loss='sparse_categorical_crossentropy', optimizer=Adam(2e-5), # 用足够小的学习率 # optimizer=PiecewiseLinearLearningRate(Adam(5e-5), {10000: 1, 30000: 0.1}),
def get_extract_model(): """ 构建事件抽取模型结构,加载模型参数,返回模型对象 1、使用bert输出预测动词下标 2、使用bert输出融合动词下标预测事件时间、地点、主语、宾语、否定词 :return: 各个部分的模型对象 """ with extract_sess.as_default(): with extract_sess.graph.as_default(): # 构建bert模型主体 bert_model = build_transformer_model( config_path=bert_config.config_path, return_keras_model=False, model=bert_config.model_type ) # 搭建模型 # 动词输入 trigger_start_in = Input(shape=(None,)) trigger_end_in = Input(shape=(None,)) # 动词下标输入 trigger_index_start_in = Input(shape=(1,)) trigger_index_end_in = Input(shape=(1,)) # 宾语输入 object_start_in = Input(shape=(None,)) object_end_in = Input(shape=(None,)) # 主语输入 subject_start_in = Input(shape=(None,)) subject_end_in = Input(shape=(None,)) # 地点输入 loc_start_in = Input(shape=(None,)) loc_end_in = Input(shape=(None,)) # 时间输入 time_start_in = Input(shape=(None,)) time_end_in = Input(shape=(None,)) # 否定词输入 negative_start_in = Input(shape=(None,)) negative_end_in = Input(shape=(None,)) # 将模型外传入的下标赋值给模型内部变量(只是为了将模型中应用与构建Model的输入区分开来) trigger_index_start, trigger_index_end = trigger_index_start_in, trigger_index_end_in trigger_start_out = Dense(1, activation='sigmoid')(bert_model.model.output) trigger_end_out = Dense(1, activation='sigmoid')(bert_model.model.output) # 预测trigger动词的模型 trigger_model = Model(bert_model.model.inputs, [trigger_start_out, trigger_end_out]) # 按照动词下标采集字向量 k1v = Lambda(seq_gather)([bert_model.model.output, trigger_index_start]) k2v = Lambda(seq_gather)([bert_model.model.output, trigger_index_end]) kv = Average()([k1v, k2v]) # 使用归一化融合动词词向量与句子张量 t = LayerNormalization(conditional=True)([bert_model.model.output, kv]) # 宾语模型输出 object_start_out = Dense(1, activation='sigmoid')(t) object_end_out = Dense(1, activation='sigmoid')(t) # 主语模型输出 subject_start_out = Dense(1, activation='sigmoid')(t) subject_end_out = Dense(1, activation='sigmoid')(t) # 地点模型输出 loc_start_out = Dense(1, activation='sigmoid')(t) loc_end_out = Dense(1, activation='sigmoid')(t) # 时间模型输出 time_start_out = Dense(1, activation='sigmoid')(t) time_end_out = Dense(1, activation='sigmoid')(t) # 否定词模型输出 negative_start_out = Dense(1, activation='sigmoid')(t) negative_end_out = Dense(1, activation='sigmoid')(t) # 输入text和trigger,预测object object_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in], [object_start_out, object_end_out]) # 输入text和trigger,预测subject subject_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in], [subject_start_out, subject_end_out]) # 输入text和trigger,预测loc loc_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in], [loc_start_out, loc_end_out]) # 输入text和trigger,预测time time_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in], [time_start_out, time_end_out]) # 输入text和trigger,预测否定词negative negative_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in], [negative_start_out, negative_end_out]) # 主模型 train_model = Model( bert_model.model.inputs + [trigger_start_in, trigger_end_in, trigger_index_start_in, trigger_index_end_in, object_start_in, object_end_in, subject_start_in, subject_end_in, loc_start_in, loc_end_in, time_start_in, time_end_in, negative_start_in, negative_end_in], [trigger_start_out, trigger_end_out, object_start_out, object_end_out, subject_start_out, subject_end_out, loc_start_out, loc_end_out, time_start_out, time_end_out, negative_start_out, negative_end_out]) # 加载事件抽取模型参数 logger.info("开始加载事件抽取模型参数。。。") train_model.load_weights(pre_config.event_extract_model_path) logger.info("事件抽取模型参数加载完成!") return trigger_model, object_model, subject_model, loc_model, time_model, negative_model
train_data = [sogou_data[j] for i, j in enumerate(random_order) if i % 3 != 0] valid_data = [sogou_data[j] for i, j in enumerate(random_order) if i % 3 == 0] train_data.extend(train_data) train_data.extend(webqa_data) # 将SogouQA和WebQA按2:1的比例混合 # 加载并精简词表,建立分词器 token_dict, keep_tokens = load_vocab( dict_path=dict_path, simplified=True, startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]'], ) tokenizer = Tokenizer(token_dict, do_lower_case=True) model = build_transformer_model( config_path, checkpoint_path, with_mlm=True, keep_tokens=keep_tokens, # 只保留keep_tokens中的字,精简原字表 ) output = Lambda(lambda x: x[:, 1:max_a_len + 1])(model.output) model = Model(model.input, output) model.summary() model.compile(loss=masked_cross_entropy, optimizer=Adam(1e-5)) # 训练模型 if not os.path.exists('../model_weight/best_model2.weights'): time_s = time.time() evaluator = Evaluator() train_generator = data_generator(train_data, batch_size)
text3, maxlen=maxlen) batch_token_ids.append(token_ids) batch_segment_ids.append(segment_ids) batch_labels.append([0]) if len(batch_token_ids) == self.batch_size or is_end: batch_token_ids = sequence_padding(batch_token_ids) batch_segment_ids = sequence_padding(batch_segment_ids) batch_labels = sequence_padding(batch_labels) yield [batch_token_ids, batch_segment_ids], batch_labels batch_token_ids, batch_segment_ids, batch_labels = [], [], [] # 加载预训练模型 model = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, model='roformer') output = GlobalAveragePooling1D()(model.output) output = Dense(units=1, activation='sigmoid')(output) model = keras.models.Model(model.input, output) model.summary() model.compile( loss='binary_crossentropy', optimizer=Adam(6e-6), metrics=['accuracy'], ) # 转换数据集
labels += [0] segment_ids = [0] * len(token_ids) batch_token_ids.append(token_ids) batch_segment_ids.append(segment_ids) batch_labels.append(labels) if len(batch_token_ids) == self.batch_size or is_end: batch_token_ids = sequence_padding(batch_token_ids) batch_segment_ids = sequence_padding(batch_segment_ids) batch_labels = sequence_padding(batch_labels) yield [batch_token_ids, batch_segment_ids], batch_labels batch_token_ids, batch_segment_ids, batch_labels = [], [], [] model = build_transformer_model( config_path, checkpoint_path, model='electra', ) output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(num_labels)(output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) model.summary() model.compile(loss=CRF.sparse_loss, optimizer=Adam(learing_rate),
end += [0] batch_token_ids.append(token_ids) batch_segment_ids.append(segment_ids) batch_start.append(to_categorical(start,2)) batch_end.append(to_categorical(end,2)) if len(batch_token_ids) == self.batch_size or is_end: batch_token_ids = sequence_padding(batch_token_ids) batch_segment_ids = sequence_padding(batch_segment_ids) batch_start = sequence_padding(batch_start) batch_end = sequence_padding(batch_end) yield [batch_token_ids,batch_segment_ids,batch_start,batch_end],None batch_token_ids,batch_segment_ids,batch_start,batch_end = [],[],[],[] bert_model = build_transformer_model( config_path=ELECTRA_CONFIG_PATH, checkpoint_path=ELECTRA_CHECKPOINT_PATH, model='electra' ) mask = bert_model.input[1] # print(bert_model.input) start_labels = Input(shape=(None,2),name="start-labels") end_labels = Input(shape=(None,2),name="end-labels") output_layers = 'Transformer-%s-FeedForward-Norm' % (bert_layer -1) x = bert_model.get_layer(output_layers).output start_output = Dense(2,activation='sigmoid',name='start')(x) end_output = Dense(2,activation='sigmoid',name='end')(x)
y_true, y_pred = inputs y_mask = K.cast(K.not_equal(y_true, 0), K.floatx()) accuracy = keras.metrics.sparse_categorical_accuracy(y_true, y_pred) accuracy = K.sum(accuracy * y_mask) / K.sum(y_mask) self.add_metric(accuracy, name='accuracy') loss = K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss model = build_transformer_model( config_path, checkpoint_path, with_mlm='linear', keep_tokens=keep_tokens, # 只保留keep_tokens中的字,精简原字表 compound_tokens=compound_tokens, # 增加词,用字平均来初始化 ) # 训练用模型 y_in = keras.layers.Input(shape=(None, )) outputs = CrossEntropy(1)([y_in, model.output]) train_model = keras.models.Model(model.inputs + [y_in], outputs) AdamW = extend_with_weight_decay(Adam, name='AdamW') AdamWG = extend_with_gradient_accumulation(AdamW, name='AdamWG') optimizer = AdamWG( learning_rate=5e-6, weight_decay_rate=0.01,
"""交叉熵作为loss,并mask掉输入部分 """ def compute_loss(self, inputs, mask=None): y_true, y_pred = inputs y_true = y_true[:, 1:] # 目标token_ids y_mask = K.cast(mask[1], K.floatx())[:, :-1] # 解码器自带mask y_pred = y_pred[:, :-1] # 预测序列,错开一位 loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss t5 = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, keep_tokens=keep_tokens, model='t5.1.1', return_keras_model=False, name='T5', ) encoder = t5.encoder decoder = t5.decoder model = t5.model model.summary() output = CrossEntropy(1)([model.inputs[1], model.outputs[0]]) model = Model(model.inputs, output) model.compile(optimizer=Adam(2e-4))
with init_graph.as_default(): output_names = [] for i in range(len(model.outputs)): output_names.append("output_" + str(i + 1)) tf.identity(model.output[i], "output_" + str(i + 1)) init_graph = sess.graph.as_graph_def() main_graph = graph_util.convert_variables_to_constants( sess, init_graph, output_names) graph_io.write_graph(main_graph, export_path, name='%s.pb' % output_name, as_text=False) return input_names, output_names if __name__ == '__main__': config_path = 'model/albert_tiny_zh_google/albert_config_tiny_g.json' checkpoint_path = 'model/albert_tiny_zh_google/albert_model.ckpt' dict_path = 'model/albert_tiny_zh_google/vocab.txt' output_path = "output/" model = build_transformer_model(config_path, checkpoint_path, model='albert', with_pool=True) # 建立模型,加载权重 inputs, outputs = export_graph(model, output_path, "albert_tiny_zh_google") print("input_names:" + str(inputs)) print("output_names:" + str(outputs))
maxlen = 32 # bert配置 config_path = '/root/kg/bert/chinese_simbert_L-12_H-768_A-12/bert_config.json' checkpoint_path = '/root/kg/bert/chinese_simbert_L-12_H-768_A-12/bert_model.ckpt' dict_path = '/root/kg/bert/chinese_simbert_L-12_H-768_A-12/vocab.txt' # 建立分词器 tokenizer = Tokenizer(dict_path, do_lower_case=True) # 建立分词器 # 建立加载模型 bert = build_transformer_model( config_path, checkpoint_path, with_pool='linear', application='unilm', return_keras_model=False, ) encoder = keras.models.Model(bert.model.inputs, bert.model.outputs[0]) seq2seq = keras.models.Model(bert.model.inputs, bert.model.outputs[1]) class SynonymsGenerator(AutoRegressiveDecoder): """seq2seq解码器 """ @AutoRegressiveDecoder.wraps(default_rtype='probas') def predict(self, inputs, output_ids, states): token_ids, segment_ids = inputs token_ids = np.concatenate([token_ids, output_ids], 1)
def E2EModel(bert_config_path, bert_checkpoint_path, LR, num_rels): bert_model = build_transformer_model( config_path=bert_config_path, checkpoint_path=bert_checkpoint_path, return_keras_model=True, ) gold_sub_heads_in = keras.layers.Input(shape=(None, )) gold_sub_tails_in = keras.layers.Input(shape=(None, )) sub_head_in = keras.layers.Input(shape=(1, )) sub_tail_in = keras.layers.Input(shape=(1, )) gold_obj_heads_in = keras.layers.Input(shape=(None, num_rels)) gold_obj_tails_in = keras.layers.Input(shape=(None, num_rels)) gold_sub_heads, gold_sub_tails, sub_head, sub_tail, gold_obj_heads, gold_obj_tails = gold_sub_heads_in, gold_sub_tails_in, sub_head_in, sub_tail_in, gold_obj_heads_in, gold_obj_tails_in tokens = bert_model.input[0] mask = keras.layers.Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(tokens) output_layer = 'Transformer-2-FeedForward-Norm' tokens_feature = bert_model.get_layer(output_layer).output pred_sub_heads = keras.layers.Dense(1, activation='sigmoid')(tokens_feature) pred_sub_tails = keras.layers.Dense(1, activation='sigmoid')(tokens_feature) subject_model = Model(bert_model.input, [pred_sub_heads, pred_sub_tails]) sub_head_feature = keras.layers.Lambda(seq_gather)( [tokens_feature, sub_head]) sub_tail_feature = keras.layers.Lambda(seq_gather)( [tokens_feature, sub_tail]) sub_feature = keras.layers.Average()([sub_head_feature, sub_tail_feature]) tokens_feature = keras.layers.Add()([tokens_feature, sub_feature]) pred_obj_heads = keras.layers.Dense(num_rels, activation='sigmoid')(tokens_feature) pred_obj_tails = keras.layers.Dense(num_rels, activation='sigmoid')(tokens_feature) object_model = Model(bert_model.input + [sub_head_in, sub_tail_in], [pred_obj_heads, pred_obj_tails]) hbt_model = Model( bert_model.input + [ gold_sub_heads_in, gold_sub_tails_in, sub_head_in, sub_tail_in, gold_obj_heads_in, gold_obj_tails_in ], [pred_sub_heads, pred_sub_tails, pred_obj_heads, pred_obj_tails]) gold_sub_heads = K.expand_dims(gold_sub_heads, 2) gold_sub_tails = K.expand_dims(gold_sub_tails, 2) sub_heads_loss = K.binary_crossentropy(gold_sub_heads, pred_sub_heads) sub_heads_loss = K.sum(sub_heads_loss * mask) / K.sum(mask) sub_tails_loss = K.binary_crossentropy(gold_sub_tails, pred_sub_tails) sub_tails_loss = K.sum(sub_tails_loss * mask) / K.sum(mask) obj_heads_loss = K.sum(K.binary_crossentropy(gold_obj_heads, pred_obj_heads), 2, keepdims=True) obj_heads_loss = K.sum(obj_heads_loss * mask) / K.sum(mask) obj_tails_loss = K.sum(K.binary_crossentropy(gold_obj_tails, pred_obj_tails), 2, keepdims=True) obj_tails_loss = K.sum(obj_tails_loss * mask) / K.sum(mask) loss = (sub_heads_loss + sub_tails_loss) + (obj_heads_loss + obj_tails_loss) hbt_model.add_loss(loss) hbt_model.compile(optimizer=Adam(LR)) hbt_model.summary() return subject_model, object_model, hbt_model
'learning_rate': 1e-5, 'gpu_mem_fraction': 0.7, # You should download the following files from Google BERT research website(pre-trained models) 'bert_config_path': '/home/hning/adversarial/limited-blackbox-attacks-master/JerryWorkFolder/uncased_L-12_H-768_A-12/bert_config.json', 'bert_checkpoint_path': '/home/hning/adversarial/limited-blackbox-attacks-master/JerryWorkFolder/uncased_L-12_H-768_A-12/bert_model.ckpt', 'dict_path': '/home/hning/adversarial/limited-blackbox-attacks-master/JerryWorkFolder/uncased_L-12_H-768_A-12/vocab.txt', 'bert_layers': 6 } # Make the architecture bert = build_transformer_model(config_path=config['bert_config_path'], checkpoint_path=config['bert_checkpoint_path'], return_keras_model=False) output = Lambda(lambda x: x[:, 0])(bert.model.output) output = Dense(units=2, activation='softmax', kernel_initializer=bert.initializer)(output) model = keras.models.Model(bert.model.input, output) model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(config['learning_rate']), metrics=['sparse_categorical_accuracy']) model.load_weights('Disaster_Rumor_Detection_best_model_1_0.weights') # Tokenizer
batch_labels = sequence_padding(batch_labels) yield [ batch_token_ids, batch_segment_ids, batch_conds ], batch_labels batch_token_ids, batch_segment_ids = [], [] batch_conds, batch_labels = [], [] c_in = Input(shape=(1,)) c = Embedding(len(variants), 128)(c_in) c = Reshape((128,))(c) model = build_transformer_model( config_path, checkpoint_path, model='roformer', layer_norm_cond=c, additional_input_layers=c_in ) output = GlobalAveragePooling1D()(model.output) output = Dense(2, activation='softmax')(output) model = Model(model.inputs, output) model.summary() AdamEMA = extend_with_exponential_moving_average(Adam, name='AdamEMA') optimizer = AdamEMA(learing_rate, ema_momentum=0.9999) model.compile( loss='sparse_categorical_crossentropy', optimizer=optimizer,
label_ = [0] * num_classes for k in label: label_[int(k)] = 1 batch_labels.append(label_) if len(batch_token_ids) == self.batch_size or is_end: batch_token_ids = sequence_padding(batch_token_ids) batch_segment_ids = sequence_padding(batch_segment_ids) batch_labels = sequence_padding(batch_labels) yield [batch_token_ids, batch_segment_ids], batch_labels batch_token_ids, batch_segment_ids, batch_labels = [], [], [] # 加载预训练模型 bert = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, model='albert', return_keras_model=False, ) output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output) output = Dense(units=num_classes, activation='sigmoid', kernel_initializer=bert.initializer)(output) model = keras.models.Model(bert.model.input, output) model.summary() AdamLR = extend_with_piecewise_linear_lr(Adam, name='AdamLR') model.compile( loss=focal_loss( gamma=1, alpha=0.9 ), # get_weight(weight_1=80,weight_0=20), 'binary_crossentropy'
"""交叉熵作为loss,并mask掉输入部分。作用就是只计算目标位置的loss,忽略其他位置的loss。 """ def compute_loss(self, inputs, mask=None): y_true, y_pred = inputs # y_true:[batch_size, sequence_length]。应该是one-hot的表示,有一个地方为1,其他地方为0:[0,0,1,...0] y_mask = K.cast(K.not_equal(y_true, 0), K.floatx()) # y_mask是一个和y_true一致的shape. 1的值还为1.0,0的值还为0.0.即[0.0,0.0,1.0,...0.0]。 # sparse_categorical_accuracy的例子。y_true = 2; y_pred = (0.02, 0.05, 0.83, 0.1); acc = sparse_categorical_accuracy(y_true, y_pred) accuracy = keras.metrics.sparse_categorical_accuracy(y_true, y_pred) accuracy = K.sum(accuracy * y_mask) / K.sum(y_mask) self.add_metric(accuracy, name='accuracy') loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss # 加载预训练模型 model = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, with_mlm=True ) # 训练用模型 y_in = keras.layers.Input(shape=(None,)) outputs = CrossEntropy(1)([y_in, model.output]) train_model = keras.models.Model(model.inputs + [y_in], outputs) train_model.compile(optimizer=Adam(8e-5)) train_model.summary() # 转换数据集 train_generator = data_generator(train_data, batch_size) valid_generator = data_generator(valid_data, batch_size) test_generator = data_generator(test_data, batch_size)
normB += b**2 if normA == 0.0 or normB == 0.0: return None else: return dot_product / ((normA * normB)**0.5) # config_path = '../config/bert/chinese_L-12_H-768_A-12/bert_config.json' # checkpoint_path = '/root/kg/bert/chinese_L-12_H-768_A-12/bert_model.ckpt' # dict_path = '/root/kg/bert/chinese_L-12_H-768_A-12/vocab.txt' config_path = roberta_dir + '/bert_config.json' checkpoint_path = roberta_dir + '/bert_model.ckpt' dict_path = roberta_dir + '/vocab.txt' tokenizer = Tokenizer(dict_path, do_lower_case=True) # 建立分词器 vec_model = build_transformer_model(config_path, checkpoint_path) # 建立模型,加载权重 def toids(s): token_ids, segment_ids = tokenizer.encode(s) token_ids, segment_ids = to_array([token_ids], [segment_ids]) return [token_ids, segment_ids] # 编码测试 # token_ids, segment_ids = tokenizer.encode(u'姚明的身高是多少') # token_ids, segment_ids = to_array([token_ids], [segment_ids]) # # print('\n ===== predicting =====\n') # a = model.predict([token_ids, segment_ids])
"""分词前处理函数 """ return [ w.replace(' ', u'\u2582').replace('\n', u'\u2583') for w in jieba.cut(text, cut_all=False) ] tokenizer = SpTokenizer(spm_path, token_start=None, token_end=None, pre_tokenize=pre_tokenize, token_translate={u'\u2583': '<cls>'}) # 建立分词器 model = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, model='gpt2') # 建立模型,加载权重 class TextExpansion(AutoRegressiveDecoder): """基于随机采样的文本续写 """ @AutoRegressiveDecoder.wraps(default_rtype='probas') def predict(self, inputs, output_ids, states): token_ids = np.concatenate([inputs[0], output_ids], 1) return model.predict(token_ids)[:, -1] def generate(self, text, n=1, topp=0.95, temperature=1): token_ids, _ = tokenizer.encode(text) results = self.random_sample([token_ids], n,
train_data = read_caption('/root/caption/coco/annotations/captions_train2014.json') valid_data = read_caption('/root/caption/coco/annotations/captions_val2014.json') # 图像模型 MobileNetV2 = keras.applications.mobilenet_v2.MobileNetV2 preprocess_input = keras.applications.mobilenet_v2.preprocess_input image_model = MobileNetV2(include_top=False, pooling='avg') img_size = 299 # Bert模型 model = build_transformer_model( config_path, checkpoint_path, application='lm', keep_tokens=keep_tokens, # 只保留keep_tokens中的字,精简原字表 layer_norm_cond=image_model.output, layer_norm_cond_hidden_size=128, layer_norm_cond_hidden_act='swish', additional_input_layers=image_model.input, ) model.summary() # 交叉熵作为loss,并mask掉输入部分的预测 y_true = model.input[0][:, 1:] # 目标tokens y_mask = model.get_layer('Embedding-Token').output_mask[:, 1:] # 目标mask y_mask = K.cast(y_mask, K.floatx()) # 转为浮点型 y_pred = model.output[:, :-1] # 预测tokens,预测与目标错开一位 cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred) cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)
Y2 = sequence_padding(Y2) Y3 = sequence_padding(Y3) yield [batch_token_ids, batch_segment_ids], [Y1, Y2, Y3] batch_token_ids, batch_segment_ids, Y1, Y2, Y3 = [], [], [], [], [] # 补充输入 # intent_labels = Input(shape=(intent_num,), name='intent_labels') # domain_labels = Input(shape=(domain_num,), name='domain_labels') # slot_labels = Input(shape=(None, slot_num), name='slot_labels') # 搭建网络 electra_model = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, model='electra', return_keras_model=False ) classify_output = Lambda(lambda x: x[:, 0], name='CLS-token')(electra_model.model.output) # 领域识别模型 domain_output = Dense(domain_num, activation='softmax', kernel_initializer=electra_model.initializer, name='domain_classifier')(classify_output) domain_model = Model(electra_model.input, domain_output) # 意图识别模型 intent_output = Dense(intent_num, activation='softmax', kernel_initializer=electra_model.initializer, name='intent_classifier')(classify_output) intent_model = Model(electra_model.model.input, intent_output)
num_labels = len(label2id.keys()) * 2 + 1 return id2label, label2id, num_labels id2label, label2id, num_labels = get_id2label( label_path="../labels/bmes_train.rbtl.labels.json") max_text_length = 128 batch_size = 16 bert_layers = 12 learing_rate = 1e-5 # bert_layers越小,学习率应该要越大 crf_lr_multiplier = 1000 # 必要时扩大CRF层的学习率 # 建立分词器 tokenizer = Tokenizer(rbtl_dict_path, do_lower_case=True) model = build_transformer_model(rbtl_config_path) output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(num_labels)(output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) model.summary() model.compile(loss=CRF.sparse_loss, optimizer=Adam(learing_rate), metrics=[CRF.sparse_accuracy])
from bert4keras.models import build_transformer_model from bert4keras.tokenizers import Tokenizer from bert4keras.snippets import AutoRegressiveDecoder # nezha配置 config_path = '/root/kg/bert/nezha_gpt_dialog/config.json' checkpoint_path = '/root/kg/bert/nezha_gpt_dialog/model.ckpt' dict_path = '/root/kg/bert/nezha_gpt_dialog/vocab.txt' # 建立分词器 tokenizer = Tokenizer(dict_path, do_lower_case=True) # 建立并加载模型 model = build_transformer_model( config_path, checkpoint_path, model='nezha', application='lm', ) model.summary() class ChatBot(AutoRegressiveDecoder): """基于随机采样对话机器人 """ @AutoRegressiveDecoder.wraps(default_rtype='probas') def predict(self, inputs, output_ids, states): token_ids, segment_ids = inputs token_ids = np.concatenate([token_ids, output_ids], 1) curr_segment_ids = np.ones_like(output_ids) - segment_ids[0, -1] segment_ids = np.concatenate([segment_ids, curr_segment_ids], 1) return model.predict([token_ids, segment_ids])[:, -1]