# -*- coding: utf-8 -*-
# @File    : model.py
# @Author  : AaronJny
# @Time    : 2019/12/25
# @Desc    :
from bert4keras.models import build_transformer_model
import tensorflow as tf
from dataset import keep_words
import settings

model = build_transformer_model(settings.CONFIG_PATH,
                                settings.CHECKPOINT_PATH,
                                application='lm',
                                keep_tokens=keep_words)

model.summary()

# loss fun,交叉熵
# 输入的数据,从第二个字符开始,可以作为正确的目标结果(输入是没有经过one-hot编码的)
y_true = model.input[0][:, 1:]
# 目标mask
y_mask = model.get_layer('Embedding-Token').output_mask[:, 1:]
y_mask = tf.cast(y_mask, tf.float32)
# 预测结果,到倒数第二个(包括)时结束
y_pred = model.output[:, :-1]
cross_entropy = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = tf.reduce_sum(cross_entropy * y_mask) / tf.reduce_sum(y_mask)
model.add_loss(cross_entropy)
model.compile(tf.keras.optimizers.Adam(1e-5))
예제 #2
0
def build_transformer_model_with_unilm():
    """带unilm的bert模型
    """
    bert = build_transformer_model(config_path,
                                   with_mlm='linear',
                                   application='unilm',
                                   return_keras_model=False)
    token_ids = bert.model.inputs[0]
    segment_ids = bert.model.inputs[1]
    proba = bert.model.output

    def unilm_loss(inputs, mask=None):
        """计算loss的函数,需要封装为一个层
        """
        y_true, y_pred, segment_ids = inputs
        y_true, y_pred = y_true[:, 1:], y_pred[:, :-1]

        if mask is None:
            mask = 1.0
        else:
            mask = K.cast(mask[1][:, 1:], floatx)

        segment_ids = K.cast(segment_ids, floatx)
        mask = mask * segment_ids[:, 1:]

        loss = K.sparse_categorical_crossentropy(y_true,
                                                 y_pred,
                                                 from_logits=True)
        loss = K.sum(loss * mask) / (K.sum(mask) + K.epsilon())
        return loss

    def unilm_acc(inputs, mask=None):
        """计算准确率的函数,需要封装为一个层
        """
        y_true, y_pred, segment_ids = inputs
        y_true, y_pred = K.cast(y_true[:, 1:], floatx), y_pred[:, :-1]

        if mask is None:
            mask = 1.0
        else:
            mask = K.cast(mask[1][:, 1:], floatx)

        segment_ids = K.cast(segment_ids, floatx)
        mask = mask * segment_ids[:, 1:]

        acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        acc = K.sum(acc * mask) / (K.sum(mask) + K.epsilon())
        return acc

    token_proba_segment = [token_ids, proba, segment_ids]
    unilm_loss = Lambda(unilm_loss, name='unilm_loss')(token_proba_segment)
    unilm_acc = Lambda(unilm_acc, name='unilm_acc')(token_proba_segment)

    train_model = Model(bert.model.inputs, [unilm_loss, unilm_acc])

    loss = {
        'unilm_loss': lambda y_true, y_pred: y_pred,
        'unilm_acc': lambda y_true, y_pred: K.stop_gradient(y_pred),
    }

    return bert, train_model, loss
예제 #3
0
"""
后面的代码使用的是bert类型的模型,如果你用的是albert,那么前几行请改为:

model = build_transformer_model(
    config_path,
    checkpoint_path,
    model='albert',
)

output_layer = 'Transformer-FeedForward-Norm'
output = model.get_layer(output_layer).get_output_at(bert_layers - 1)
"""

model = build_transformer_model(
    config_path,
    checkpoint_path,
)

output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
output = model.get_layer(output_layer).output
output = Dense(num_labels)(output)
CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
output = CRF(output)

model = Model(model.input, output)
model.summary()

model.compile(loss=CRF.sparse_loss,
              optimizer=Adam(learing_rate),
              metrics=[CRF.sparse_accuracy])
        y_true = y_true[:, 1:]  # 目标token_ids
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


c_in = Input(shape=(1, ))
c = Embedding(num_classes, 128)(c_in)
c = Reshape((128, ))(c)

# Bert模型
model = build_transformer_model(
    config_path,
    checkpoint_path,
    application='lm',
    keep_tokens=keep_tokens,  # 只保留keep_tokens中的字,精简原字表
    layer_norm_cond=c,
    additional_input_layers=c_in,
)

output = CrossEntropy(1)([model.inputs[0], model.outputs[0]])

model = Model(model.inputs, output)
model.compile(optimizer=Adam(1e-5))
model.summary()


class RandomSentiment(AutoRegressiveDecoder):
    """根据情感标签(0:负,1:正)随机生成一批句子
    """
    @AutoRegressiveDecoder.wraps(default_rtype='probas')
    def compute_loss(self, inputs, mask=None):
        y_true, y_pred = inputs
        if mask[1] is None:
            y_mask = 1.0
        else:
            y_mask = K.cast(mask[1], K.floatx())[:, 1:]
        y_true = y_true[:, 1:]  # 目标token_ids
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


model = build_transformer_model(
    config_path,
    checkpoint_path,
    application='lm',
    keep_tokens=keep_tokens,  # 只保留keep_tokens中的字,精简原字表
)

output = CrossEntropy(1)([model.inputs[0], model.outputs[0]])

model = Model(model.inputs, output)
model.compile(optimizer=Adam(1e-5))
model.summary()


class ChessPlayer(object):
    """交互式下棋程序
    """
    def move_to_chinese(self, move):
        """将单步走法转为中文描述
예제 #6
0
    num_labels = len(label2id.keys()) * 2 + 1
    return id2label, label2id, num_labels


id2label, label2id, num_labels = get_id2label(label_path="medical_train.ner.labels.json")
max_text_length = 128
batch_size = 16
bert_layers = 3
learing_rate = 1e-5  # bert_layers越小,学习率应该要越大
crf_lr_multiplier = 1000  # 必要时扩大CRF层的学习率

# 建立分词器
tokenizer = Tokenizer(rbtl_dict_path, do_lower_case=True)

model = build_transformer_model(
    rbtl_config_path,
    rbtl_checkpoint_path,
)

output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
output = model.get_layer(output_layer).output
output = Dense(num_labels)(output)
CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
output = CRF(output)

model = Model(model.input, output)
model.summary()

model.compile(
    loss=CRF.sparse_loss,
    optimizer=Adam(learing_rate),
    metrics=[CRF.sparse_accuracy]
예제 #7
0
                batch_token_ids, batch_segment_ids = [], []

train_generator = data_generator(train_data, batch_size)

#  Loss function
class CrossEntropy(Loss):
    def compute_loss(self, inputs, mask=None):
        y_true, y_mask, y_pred = inputs
        y_true = y_true[:, 1:]  # 目标token_ids
        y_mask = y_mask[:, 1:]  # segment_ids,刚好指示了要预测的部分
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss

model = build_transformer_model(config_path, checkpoint_path, application='unilm', keep_tokens=keep_tokens)
output = CrossEntropy(2)(model.inputs + model.outputs)
model = Model(model.inputs, output)
model.compile(optimizer=Adam(1e-5))

class AutoTitle(AutoRegressiveDecoder):
    @AutoRegressiveDecoder.set_rtype('probas')
    def predict(self, inputs, output_ids, step):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate([segment_ids, np.ones_like(output_ids)], 1)
        return model.predict([token_ids, segment_ids])[:, -1]
    def generate(self, text, emotion, topk=2):
        max_c_len = maxlen - self.maxlen
        token_ids, segment_ids = tokenizer.encode(text, max_length=max_c_len)
        token_ids[0] = emotion
import numpy as np
from bert4keras.models import build_transformer_model
from bert4keras.tokenizers import Tokenizer
from bert4keras.snippets import AutoRegressiveDecoder
from bert4keras.snippets import uniout

config_path = '/root/kg/bert/chinese_nezha_gpt_L-12_H-768_A-12/config.json'
checkpoint_path = '/root/kg/bert/chinese_nezha_gpt_L-12_H-768_A-12/gpt.ckpt'
dict_path = '/root/kg/bert/chinese_nezha_gpt_L-12_H-768_A-12/vocab.txt'

tokenizer = Tokenizer(dict_path, do_lower_case=True)  # 建立分词器

model = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    segment_vocab_size=0,  # 去掉segmeng_ids输入
    application='lm',
)  # 建立模型,加载权重


class ArticleCompletion(AutoRegressiveDecoder):
    """基于随机采样的文章续写
    """
    @AutoRegressiveDecoder.wraps(default_rtype='probas')
    def predict(self, inputs, output_ids, states):
        token_ids = np.concatenate([inputs[0], output_ids], 1)
        return self.last_token(model).predict(token_ids)

    def generate(self, text, n=1, topp=0.95):
        token_ids = tokenizer.encode(text)[0][:-1]
        results = self.random_sample([token_ids], n, topp=topp)  # 基于随机采样
 def __init__(self,topK):
     self.topK = topK
     self.tokenizer = Tokenizer(Config.BERT_VOCAB_PATH,do_lower_case=True)
     self.model = build_transformer_model(Config.BERT_CONFIG_PATH,Config.BERT_CHECKPOINT_PATH,with_mlm = True)
     self.token_ids, self.segment_ids = self.tokenizer.encode(' ')
                                                      maxlen=maxlen)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_labels.append([label])
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_labels = sequence_padding(batch_labels)
                yield [batch_token_ids, batch_segment_ids], batch_labels
                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


# 加载预训练模型
bert = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    with_pool=True,
    return_keras_model=False,
)

output = Dropout(rate=0.1)(bert.model.output)
output = Dense(units=2,
               activation='softmax',
               kernel_initializer=bert.initializer)(output)

model = keras.models.Model(bert.model.input, output)
model.summary()

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(2e-5),  # 用足够小的学习率
    # optimizer=PiecewiseLinearLearningRate(Adam(5e-5), {10000: 1, 30000: 0.1}),
예제 #11
0
def get_extract_model():
    """
    构建事件抽取模型结构,加载模型参数,返回模型对象
    1、使用bert输出预测动词下标
    2、使用bert输出融合动词下标预测事件时间、地点、主语、宾语、否定词
    :return: 各个部分的模型对象
    """
    with extract_sess.as_default():
        with extract_sess.graph.as_default():
            # 构建bert模型主体
            bert_model = build_transformer_model(
                config_path=bert_config.config_path,
                return_keras_model=False,
                model=bert_config.model_type
            )

            # 搭建模型
            # 动词输入
            trigger_start_in = Input(shape=(None,))
            trigger_end_in = Input(shape=(None,))
            # 动词下标输入
            trigger_index_start_in = Input(shape=(1,))
            trigger_index_end_in = Input(shape=(1,))
            # 宾语输入
            object_start_in = Input(shape=(None,))
            object_end_in = Input(shape=(None,))
            # 主语输入
            subject_start_in = Input(shape=(None,))
            subject_end_in = Input(shape=(None,))
            # 地点输入
            loc_start_in = Input(shape=(None,))
            loc_end_in = Input(shape=(None,))
            # 时间输入
            time_start_in = Input(shape=(None,))
            time_end_in = Input(shape=(None,))
            # 否定词输入
            negative_start_in = Input(shape=(None,))
            negative_end_in = Input(shape=(None,))
            # 将模型外传入的下标赋值给模型内部变量(只是为了将模型中应用与构建Model的输入区分开来)
            trigger_index_start, trigger_index_end = trigger_index_start_in, trigger_index_end_in

            trigger_start_out = Dense(1, activation='sigmoid')(bert_model.model.output)
            trigger_end_out = Dense(1, activation='sigmoid')(bert_model.model.output)
            # 预测trigger动词的模型
            trigger_model = Model(bert_model.model.inputs, [trigger_start_out, trigger_end_out])

            # 按照动词下标采集字向量
            k1v = Lambda(seq_gather)([bert_model.model.output, trigger_index_start])
            k2v = Lambda(seq_gather)([bert_model.model.output, trigger_index_end])
            kv = Average()([k1v, k2v])
            # 使用归一化融合动词词向量与句子张量
            t = LayerNormalization(conditional=True)([bert_model.model.output, kv])

            # 宾语模型输出
            object_start_out = Dense(1, activation='sigmoid')(t)
            object_end_out = Dense(1, activation='sigmoid')(t)
            # 主语模型输出
            subject_start_out = Dense(1, activation='sigmoid')(t)
            subject_end_out = Dense(1, activation='sigmoid')(t)
            # 地点模型输出
            loc_start_out = Dense(1, activation='sigmoid')(t)
            loc_end_out = Dense(1, activation='sigmoid')(t)
            # 时间模型输出
            time_start_out = Dense(1, activation='sigmoid')(t)
            time_end_out = Dense(1, activation='sigmoid')(t)
            # 否定词模型输出
            negative_start_out = Dense(1, activation='sigmoid')(t)
            negative_end_out = Dense(1, activation='sigmoid')(t)

            # 输入text和trigger,预测object
            object_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                                 [object_start_out, object_end_out])
            # 输入text和trigger,预测subject
            subject_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                                  [subject_start_out, subject_end_out])
            # 输入text和trigger,预测loc
            loc_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                              [loc_start_out, loc_end_out])
            # 输入text和trigger,预测time
            time_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                               [time_start_out, time_end_out])
            # 输入text和trigger,预测否定词negative
            negative_model = Model(bert_model.model.inputs + [trigger_index_start_in, trigger_index_end_in],
                                   [negative_start_out, negative_end_out])

            # 主模型
            train_model = Model(
                bert_model.model.inputs + [trigger_start_in, trigger_end_in, trigger_index_start_in, trigger_index_end_in,
                                           object_start_in, object_end_in, subject_start_in, subject_end_in, loc_start_in,
                                           loc_end_in, time_start_in, time_end_in, negative_start_in, negative_end_in],
                [trigger_start_out, trigger_end_out, object_start_out, object_end_out, subject_start_out, subject_end_out,
                 loc_start_out, loc_end_out, time_start_out, time_end_out, negative_start_out, negative_end_out])
            # 加载事件抽取模型参数
            logger.info("开始加载事件抽取模型参数。。。")
            train_model.load_weights(pre_config.event_extract_model_path)
            logger.info("事件抽取模型参数加载完成!")

    return trigger_model, object_model, subject_model, loc_model, time_model, negative_model
예제 #12
0
    train_data = [sogou_data[j] for i, j in enumerate(random_order) if i % 3 != 0]
    valid_data = [sogou_data[j] for i, j in enumerate(random_order) if i % 3 == 0]
    train_data.extend(train_data)
    train_data.extend(webqa_data)  # 将SogouQA和WebQA按2:1的比例混合

    # 加载并精简词表,建立分词器
    token_dict, keep_tokens = load_vocab(
        dict_path=dict_path,
        simplified=True,
        startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]'],
    )
    tokenizer = Tokenizer(token_dict, do_lower_case=True)

    model = build_transformer_model(
        config_path,
        checkpoint_path,
        with_mlm=True,
        keep_tokens=keep_tokens,  # 只保留keep_tokens中的字,精简原字表
    )
    output = Lambda(lambda x: x[:, 1:max_a_len + 1])(model.output)
    model = Model(model.input, output)
    model.summary()

    model.compile(loss=masked_cross_entropy, optimizer=Adam(1e-5))



    # 训练模型
    if not os.path.exists('../model_weight/best_model2.weights'):
        time_s = time.time()
        evaluator = Evaluator()
        train_generator = data_generator(train_data, batch_size)
예제 #13
0
                                                      text3,
                                                      maxlen=maxlen)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_labels.append([0])
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_labels = sequence_padding(batch_labels)
                yield [batch_token_ids, batch_segment_ids], batch_labels
                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


# 加载预训练模型
model = build_transformer_model(config_path=config_path,
                                checkpoint_path=checkpoint_path,
                                model='roformer')

output = GlobalAveragePooling1D()(model.output)
output = Dense(units=1, activation='sigmoid')(output)

model = keras.models.Model(model.input, output)
model.summary()

model.compile(
    loss='binary_crossentropy',
    optimizer=Adam(6e-6),
    metrics=['accuracy'],
)

# 转换数据集
예제 #14
0
            labels += [0]
            segment_ids = [0] * len(token_ids)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_labels.append(labels)
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_labels = sequence_padding(batch_labels)
                yield [batch_token_ids, batch_segment_ids], batch_labels
                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


model = build_transformer_model(
    config_path,
    checkpoint_path,
    model='electra',
)

output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
output = model.get_layer(output_layer).output

output = Dense(num_labels)(output)
CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
output = CRF(output)

model = Model(model.input, output)
model.summary()

model.compile(loss=CRF.sparse_loss,
              optimizer=Adam(learing_rate),
            end += [0]
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)
            batch_start.append(to_categorical(start,2))
            batch_end.append(to_categorical(end,2))
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_start = sequence_padding(batch_start)
                batch_end = sequence_padding(batch_end)
                yield [batch_token_ids,batch_segment_ids,batch_start,batch_end],None
                batch_token_ids,batch_segment_ids,batch_start,batch_end = [],[],[],[]

bert_model = build_transformer_model(
    config_path=ELECTRA_CONFIG_PATH,
    checkpoint_path=ELECTRA_CHECKPOINT_PATH,
    model='electra'
)


mask = bert_model.input[1]
# print(bert_model.input)
start_labels = Input(shape=(None,2),name="start-labels")
end_labels = Input(shape=(None,2),name="end-labels")

output_layers = 'Transformer-%s-FeedForward-Norm' % (bert_layer -1)
x = bert_model.get_layer(output_layers).output

start_output = Dense(2,activation='sigmoid',name='start')(x)
end_output = Dense(2,activation='sigmoid',name='end')(x)
        y_true, y_pred = inputs
        y_mask = K.cast(K.not_equal(y_true, 0), K.floatx())
        accuracy = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        accuracy = K.sum(accuracy * y_mask) / K.sum(y_mask)
        self.add_metric(accuracy, name='accuracy')
        loss = K.sparse_categorical_crossentropy(y_true,
                                                 y_pred,
                                                 from_logits=True)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


model = build_transformer_model(
    config_path,
    checkpoint_path,
    with_mlm='linear',
    keep_tokens=keep_tokens,  # 只保留keep_tokens中的字,精简原字表
    compound_tokens=compound_tokens,  # 增加词,用字平均来初始化
)

# 训练用模型
y_in = keras.layers.Input(shape=(None, ))
outputs = CrossEntropy(1)([y_in, model.output])

train_model = keras.models.Model(model.inputs + [y_in], outputs)

AdamW = extend_with_weight_decay(Adam, name='AdamW')
AdamWG = extend_with_gradient_accumulation(AdamW, name='AdamWG')
optimizer = AdamWG(
    learning_rate=5e-6,
    weight_decay_rate=0.01,
    """交叉熵作为loss,并mask掉输入部分
    """
    def compute_loss(self, inputs, mask=None):
        y_true, y_pred = inputs
        y_true = y_true[:, 1:]  # 目标token_ids
        y_mask = K.cast(mask[1], K.floatx())[:, :-1]  # 解码器自带mask
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


t5 = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    keep_tokens=keep_tokens,
    model='t5.1.1',
    return_keras_model=False,
    name='T5',
)

encoder = t5.encoder
decoder = t5.decoder
model = t5.model
model.summary()

output = CrossEntropy(1)([model.inputs[1], model.outputs[0]])

model = Model(model.inputs, output)
model.compile(optimizer=Adam(2e-4))

예제 #18
0
        with init_graph.as_default():
            output_names = []

            for i in range(len(model.outputs)):
                output_names.append("output_" + str(i + 1))
                tf.identity(model.output[i], "output_" + str(i + 1))

            init_graph = sess.graph.as_graph_def()
            main_graph = graph_util.convert_variables_to_constants(
                sess, init_graph, output_names)
            graph_io.write_graph(main_graph,
                                 export_path,
                                 name='%s.pb' % output_name,
                                 as_text=False)

    return input_names, output_names


if __name__ == '__main__':
    config_path = 'model/albert_tiny_zh_google/albert_config_tiny_g.json'
    checkpoint_path = 'model/albert_tiny_zh_google/albert_model.ckpt'
    dict_path = 'model/albert_tiny_zh_google/vocab.txt'
    output_path = "output/"
    model = build_transformer_model(config_path,
                                    checkpoint_path,
                                    model='albert',
                                    with_pool=True)  # 建立模型,加载权重
    inputs, outputs = export_graph(model, output_path, "albert_tiny_zh_google")
    print("input_names:" + str(inputs))
    print("output_names:" + str(outputs))
maxlen = 32

# bert配置
config_path = '/root/kg/bert/chinese_simbert_L-12_H-768_A-12/bert_config.json'
checkpoint_path = '/root/kg/bert/chinese_simbert_L-12_H-768_A-12/bert_model.ckpt'
dict_path = '/root/kg/bert/chinese_simbert_L-12_H-768_A-12/vocab.txt'

# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)  # 建立分词器

# 建立加载模型
bert = build_transformer_model(
    config_path,
    checkpoint_path,
    with_pool='linear',
    application='unilm',
    return_keras_model=False,
)

encoder = keras.models.Model(bert.model.inputs, bert.model.outputs[0])
seq2seq = keras.models.Model(bert.model.inputs, bert.model.outputs[1])


class SynonymsGenerator(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.wraps(default_rtype='probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
예제 #20
0
def E2EModel(bert_config_path, bert_checkpoint_path, LR, num_rels):
    bert_model = build_transformer_model(
        config_path=bert_config_path,
        checkpoint_path=bert_checkpoint_path,
        return_keras_model=True,
    )

    gold_sub_heads_in = keras.layers.Input(shape=(None, ))
    gold_sub_tails_in = keras.layers.Input(shape=(None, ))
    sub_head_in = keras.layers.Input(shape=(1, ))
    sub_tail_in = keras.layers.Input(shape=(1, ))
    gold_obj_heads_in = keras.layers.Input(shape=(None, num_rels))
    gold_obj_tails_in = keras.layers.Input(shape=(None, num_rels))

    gold_sub_heads, gold_sub_tails, sub_head, sub_tail, gold_obj_heads, gold_obj_tails = gold_sub_heads_in, gold_sub_tails_in, sub_head_in, sub_tail_in, gold_obj_heads_in, gold_obj_tails_in
    tokens = bert_model.input[0]
    mask = keras.layers.Lambda(
        lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(tokens)

    output_layer = 'Transformer-2-FeedForward-Norm'
    tokens_feature = bert_model.get_layer(output_layer).output
    pred_sub_heads = keras.layers.Dense(1,
                                        activation='sigmoid')(tokens_feature)
    pred_sub_tails = keras.layers.Dense(1,
                                        activation='sigmoid')(tokens_feature)

    subject_model = Model(bert_model.input, [pred_sub_heads, pred_sub_tails])

    sub_head_feature = keras.layers.Lambda(seq_gather)(
        [tokens_feature, sub_head])
    sub_tail_feature = keras.layers.Lambda(seq_gather)(
        [tokens_feature, sub_tail])
    sub_feature = keras.layers.Average()([sub_head_feature, sub_tail_feature])

    tokens_feature = keras.layers.Add()([tokens_feature, sub_feature])
    pred_obj_heads = keras.layers.Dense(num_rels,
                                        activation='sigmoid')(tokens_feature)
    pred_obj_tails = keras.layers.Dense(num_rels,
                                        activation='sigmoid')(tokens_feature)

    object_model = Model(bert_model.input + [sub_head_in, sub_tail_in],
                         [pred_obj_heads, pred_obj_tails])

    hbt_model = Model(
        bert_model.input + [
            gold_sub_heads_in, gold_sub_tails_in, sub_head_in, sub_tail_in,
            gold_obj_heads_in, gold_obj_tails_in
        ], [pred_sub_heads, pred_sub_tails, pred_obj_heads, pred_obj_tails])

    gold_sub_heads = K.expand_dims(gold_sub_heads, 2)
    gold_sub_tails = K.expand_dims(gold_sub_tails, 2)

    sub_heads_loss = K.binary_crossentropy(gold_sub_heads, pred_sub_heads)
    sub_heads_loss = K.sum(sub_heads_loss * mask) / K.sum(mask)
    sub_tails_loss = K.binary_crossentropy(gold_sub_tails, pred_sub_tails)
    sub_tails_loss = K.sum(sub_tails_loss * mask) / K.sum(mask)

    obj_heads_loss = K.sum(K.binary_crossentropy(gold_obj_heads,
                                                 pred_obj_heads),
                           2,
                           keepdims=True)
    obj_heads_loss = K.sum(obj_heads_loss * mask) / K.sum(mask)
    obj_tails_loss = K.sum(K.binary_crossentropy(gold_obj_tails,
                                                 pred_obj_tails),
                           2,
                           keepdims=True)
    obj_tails_loss = K.sum(obj_tails_loss * mask) / K.sum(mask)

    loss = (sub_heads_loss + sub_tails_loss) + (obj_heads_loss +
                                                obj_tails_loss)

    hbt_model.add_loss(loss)
    hbt_model.compile(optimizer=Adam(LR))
    hbt_model.summary()

    return subject_model, object_model, hbt_model
    'learning_rate': 1e-5,
    'gpu_mem_fraction': 0.7,

    # You should download the following files from Google BERT research website(pre-trained models)
    'bert_config_path':
    '/home/hning/adversarial/limited-blackbox-attacks-master/JerryWorkFolder/uncased_L-12_H-768_A-12/bert_config.json',
    'bert_checkpoint_path':
    '/home/hning/adversarial/limited-blackbox-attacks-master/JerryWorkFolder/uncased_L-12_H-768_A-12/bert_model.ckpt',
    'dict_path':
    '/home/hning/adversarial/limited-blackbox-attacks-master/JerryWorkFolder/uncased_L-12_H-768_A-12/vocab.txt',
    'bert_layers': 6
}

# Make the architecture
bert = build_transformer_model(config_path=config['bert_config_path'],
                               checkpoint_path=config['bert_checkpoint_path'],
                               return_keras_model=False)

output = Lambda(lambda x: x[:, 0])(bert.model.output)
output = Dense(units=2,
               activation='softmax',
               kernel_initializer=bert.initializer)(output)
model = keras.models.Model(bert.model.input, output)

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(config['learning_rate']),
              metrics=['sparse_categorical_accuracy'])

model.load_weights('Disaster_Rumor_Detection_best_model_1_0.weights')

# Tokenizer
예제 #22
0
                batch_labels = sequence_padding(batch_labels)
                yield [
                    batch_token_ids, batch_segment_ids, batch_conds
                ], batch_labels
                batch_token_ids, batch_segment_ids = [], []
                batch_conds, batch_labels = [], []


c_in = Input(shape=(1,))
c = Embedding(len(variants), 128)(c_in)
c = Reshape((128,))(c)

model = build_transformer_model(
    config_path,
    checkpoint_path,
    model='roformer',
    layer_norm_cond=c,
    additional_input_layers=c_in
)

output = GlobalAveragePooling1D()(model.output)
output = Dense(2, activation='softmax')(output)

model = Model(model.inputs, output)
model.summary()

AdamEMA = extend_with_exponential_moving_average(Adam, name='AdamEMA')
optimizer = AdamEMA(learing_rate, ema_momentum=0.9999)
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=optimizer,
예제 #23
0
            label_ = [0] * num_classes
            for k in label:
                label_[int(k)] = 1
            batch_labels.append(label_)
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = sequence_padding(batch_token_ids)
                batch_segment_ids = sequence_padding(batch_segment_ids)
                batch_labels = sequence_padding(batch_labels)
                yield [batch_token_ids, batch_segment_ids], batch_labels
                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


# 加载预训练模型
bert = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    model='albert',
    return_keras_model=False,
)

output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output)
output = Dense(units=num_classes,
               activation='sigmoid',
               kernel_initializer=bert.initializer)(output)
model = keras.models.Model(bert.model.input, output)
model.summary()

AdamLR = extend_with_piecewise_linear_lr(Adam, name='AdamLR')
model.compile(
    loss=focal_loss(
        gamma=1, alpha=0.9
    ),  # get_weight(weight_1=80,weight_0=20), 'binary_crossentropy'
예제 #24
0
    """交叉熵作为loss,并mask掉输入部分。作用就是只计算目标位置的loss,忽略其他位置的loss。
    """
    def compute_loss(self, inputs, mask=None):
        y_true, y_pred = inputs # y_true:[batch_size, sequence_length]。应该是one-hot的表示,有一个地方为1,其他地方为0:[0,0,1,...0]
        y_mask = K.cast(K.not_equal(y_true, 0), K.floatx()) # y_mask是一个和y_true一致的shape. 1的值还为1.0,0的值还为0.0.即[0.0,0.0,1.0,...0.0]。
        # sparse_categorical_accuracy的例子。y_true = 2; y_pred = (0.02, 0.05, 0.83, 0.1); acc = sparse_categorical_accuracy(y_true, y_pred)
        accuracy = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        accuracy = K.sum(accuracy * y_mask) / K.sum(y_mask)
        self.add_metric(accuracy, name='accuracy')
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss

# 加载预训练模型
model = build_transformer_model(
    config_path=config_path, checkpoint_path=checkpoint_path, with_mlm=True
)

# 训练用模型
y_in = keras.layers.Input(shape=(None,))
outputs = CrossEntropy(1)([y_in, model.output])

train_model = keras.models.Model(model.inputs + [y_in], outputs)
train_model.compile(optimizer=Adam(8e-5))
train_model.summary()

# 转换数据集
train_generator = data_generator(train_data, batch_size)
valid_generator = data_generator(valid_data, batch_size)
test_generator = data_generator(test_data, batch_size)
예제 #25
0
        normB += b**2
    if normA == 0.0 or normB == 0.0:
        return None
    else:
        return dot_product / ((normA * normB)**0.5)


# config_path = '../config/bert/chinese_L-12_H-768_A-12/bert_config.json'
# checkpoint_path = '/root/kg/bert/chinese_L-12_H-768_A-12/bert_model.ckpt'
# dict_path = '/root/kg/bert/chinese_L-12_H-768_A-12/vocab.txt'
config_path = roberta_dir + '/bert_config.json'
checkpoint_path = roberta_dir + '/bert_model.ckpt'
dict_path = roberta_dir + '/vocab.txt'

tokenizer = Tokenizer(dict_path, do_lower_case=True)  # 建立分词器
vec_model = build_transformer_model(config_path, checkpoint_path)  # 建立模型,加载权重


def toids(s):
    token_ids, segment_ids = tokenizer.encode(s)
    token_ids, segment_ids = to_array([token_ids], [segment_ids])
    return [token_ids, segment_ids]


# 编码测试
# token_ids, segment_ids = tokenizer.encode(u'姚明的身高是多少')
# token_ids, segment_ids = to_array([token_ids], [segment_ids])
#
# print('\n ===== predicting =====\n')
# a = model.predict([token_ids, segment_ids])
    """分词前处理函数
    """
    return [
        w.replace(' ', u'\u2582').replace('\n', u'\u2583')
        for w in jieba.cut(text, cut_all=False)
    ]


tokenizer = SpTokenizer(spm_path,
                        token_start=None,
                        token_end=None,
                        pre_tokenize=pre_tokenize,
                        token_translate={u'\u2583': '<cls>'})  # 建立分词器

model = build_transformer_model(config_path=config_path,
                                checkpoint_path=checkpoint_path,
                                model='gpt2')  # 建立模型,加载权重


class TextExpansion(AutoRegressiveDecoder):
    """基于随机采样的文本续写
    """
    @AutoRegressiveDecoder.wraps(default_rtype='probas')
    def predict(self, inputs, output_ids, states):
        token_ids = np.concatenate([inputs[0], output_ids], 1)
        return model.predict(token_ids)[:, -1]

    def generate(self, text, n=1, topp=0.95, temperature=1):
        token_ids, _ = tokenizer.encode(text)
        results = self.random_sample([token_ids],
                                     n,
예제 #27
0
train_data = read_caption('/root/caption/coco/annotations/captions_train2014.json')
valid_data = read_caption('/root/caption/coco/annotations/captions_val2014.json')


# 图像模型
MobileNetV2 = keras.applications.mobilenet_v2.MobileNetV2
preprocess_input = keras.applications.mobilenet_v2.preprocess_input
image_model = MobileNetV2(include_top=False, pooling='avg')
img_size = 299

# Bert模型
model = build_transformer_model(
    config_path,
    checkpoint_path,
    application='lm',
    keep_tokens=keep_tokens,  # 只保留keep_tokens中的字,精简原字表
    layer_norm_cond=image_model.output,
    layer_norm_cond_hidden_size=128,
    layer_norm_cond_hidden_act='swish',
    additional_input_layers=image_model.input,
)

model.summary()

# 交叉熵作为loss,并mask掉输入部分的预测
y_true = model.input[0][:, 1:]  # 目标tokens
y_mask = model.get_layer('Embedding-Token').output_mask[:, 1:]  # 目标mask
y_mask = K.cast(y_mask, K.floatx())  # 转为浮点型
y_pred = model.output[:, :-1]  # 预测tokens,预测与目标错开一位
cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)
예제 #28
0
                Y2 = sequence_padding(Y2)
                Y3 = sequence_padding(Y3)

                yield [batch_token_ids, batch_segment_ids], [Y1, Y2, Y3]
                batch_token_ids, batch_segment_ids, Y1, Y2, Y3 = [], [], [], [], []


# 补充输入
# intent_labels = Input(shape=(intent_num,), name='intent_labels')
# domain_labels = Input(shape=(domain_num,), name='domain_labels')
# slot_labels = Input(shape=(None, slot_num), name='slot_labels')

# 搭建网络
electra_model = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    model='electra',
    return_keras_model=False
)

classify_output = Lambda(lambda x: x[:, 0], name='CLS-token')(electra_model.model.output)

# 领域识别模型
domain_output = Dense(domain_num, activation='softmax', kernel_initializer=electra_model.initializer,
                      name='domain_classifier')(classify_output)
domain_model = Model(electra_model.input, domain_output)

# 意图识别模型
intent_output = Dense(intent_num, activation='softmax', kernel_initializer=electra_model.initializer,
                      name='intent_classifier')(classify_output)
intent_model = Model(electra_model.model.input, intent_output)
예제 #29
0
    num_labels = len(label2id.keys()) * 2 + 1
    return id2label, label2id, num_labels


id2label, label2id, num_labels = get_id2label(
    label_path="../labels/bmes_train.rbtl.labels.json")
max_text_length = 128
batch_size = 16
bert_layers = 12
learing_rate = 1e-5  # bert_layers越小,学习率应该要越大
crf_lr_multiplier = 1000  # 必要时扩大CRF层的学习率

# 建立分词器
tokenizer = Tokenizer(rbtl_dict_path, do_lower_case=True)

model = build_transformer_model(rbtl_config_path)

output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
output = model.get_layer(output_layer).output
output = Dense(num_labels)(output)
CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
output = CRF(output)

model = Model(model.input, output)
model.summary()

model.compile(loss=CRF.sparse_loss,
              optimizer=Adam(learing_rate),
              metrics=[CRF.sparse_accuracy])

예제 #30
0
from bert4keras.models import build_transformer_model
from bert4keras.tokenizers import Tokenizer
from bert4keras.snippets import AutoRegressiveDecoder

# nezha配置
config_path = '/root/kg/bert/nezha_gpt_dialog/config.json'
checkpoint_path = '/root/kg/bert/nezha_gpt_dialog/model.ckpt'
dict_path = '/root/kg/bert/nezha_gpt_dialog/vocab.txt'

# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)

# 建立并加载模型
model = build_transformer_model(
    config_path,
    checkpoint_path,
    model='nezha',
    application='lm',
)
model.summary()


class ChatBot(AutoRegressiveDecoder):
    """基于随机采样对话机器人
    """
    @AutoRegressiveDecoder.wraps(default_rtype='probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        curr_segment_ids = np.ones_like(output_ids) - segment_ids[0, -1]
        segment_ids = np.concatenate([segment_ids, curr_segment_ids], 1)
        return model.predict([token_ids, segment_ids])[:, -1]