def build_ner_albert(args):
    bert_model = build_transformer_model(
        config_path=args.config_path,
        checkpoint_path=args.checkpoint_path,
        model='albert',
        # return_keras_model=False,
    )

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])
    #x = Lambda(lambda x: x[:, 0])(x)

    p = Dense(args.nclass, activation='softmax', name="p")(x)

    model = Model([x1_in, x2_in], p)

    model.compile(
        #loss=multi_category_focal_loss2(gamma=2., alpha=.25),
        loss='categorical_crossentropy',
        optimizer=Adam(args.lr),
        #metrics=["accuracy"]
    )

    print(model.summary())
    return model
def build_ner_bert(args, training=False):
    bert_model = load_trained_model_from_checkpoint(
        args.config_path,
        args.checkpoint_path,
        seq_len=None,
        training=training)  #加载预训练模型

    for l in bert_model.layers:
        #if "-12-" in l.name or "-11-" in l.name or "-10-" in l.name: # freeze certrain encoder blocks while finetuning
        l.trainable = True

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])
    #x = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x) #directly extract output of encoder blocks of layer 1-12.

    p = Dense(args.nclass, activation='softmax', name="p")(x)

    model = Model([x1_in, x2_in], p)

    model.compile(
        #loss=multi_category_focal_loss2(gamma=2., alpha=.25),
        loss='categorical_crossentropy',
        optimizer=Adam(args.lr),
        #metrics=["accuracy"]
    )

    print(model.summary())
    return model
def build_mrc_bert(args, training=False):
    bert_model = load_trained_model_from_checkpoint(
        args.config_path,
        args.checkpoint_path,
        seq_len=None,
        training=training)  #加载预训练模型
    print(bert_model)
    for l in bert_model.layers:
        #if "-12-" in l.name or "-11-" in l.name or "-10-" in l.name:  # freeze certrain encoder blocks while finetuning
        l.trainable = True

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])

    #x = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x) #directly extract output of encoder blocks of layer 1-12.

    p_start = Dense(1, activation='sigmoid', name="p_start")(x)
    p_end = Dense(1, activation='sigmoid', name="p_end")(x)

    model = Model([x1_in, x2_in], [p_start, p_end])

    model.compile(
        loss=focal_loss(gamma=2., alpha=.25),
        #loss='binary_crossentropy',
        optimizer=Adam(args.lr),  #用足够小的学习率
        #loss_weights=[1., 1.]
        #metrics=['accuracy']
    )

    print(model.summary())
    return model
def build_cls_bert(args):
    bert_model = load_trained_model_from_checkpoint(
        args.config_path, args.checkpoint_path, seq_len=None,
        use_adapter=True)  # 加载预训练模型

    for l in bert_model.layers:
        #if "-12-" in l.name or "-11-" in l.name or "-10-" in l.name: # freeze certrain encoder blocks while finetuning
        l.trainable = True  # False to freeze parameters in this encoder layer

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])
    '''
    a1 = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x)#extract output from the last encoder layer
    a2 = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(11))(x)
    x = Add()([a1, a2])
    x = Lambda(lambda x: x[:, 0])(x) # extrace [CLS] tensor for downstream tasks.
    x = bert_model.get_layer('NSP-Dense').output
    '''
    p = Dense(args.nclass, activation='softmax')(x)

    model = Model([x1_in, x2_in], p)
    model.compile(
        loss='categorical_crossentropy',
        #loss = focal_loss(gamma=2., alpha=.25),
        #loss=multi_category_focal_loss2(gamma=2., alpha=.25),
        #optimizer=AdamLR(learning_rate=1e-4, lr_schedule={1000: 1,2000: 0.1}),
        optimizer=Adam(args.lr),
        metrics=['accuracy', f1])
    print(model.summary())
    return model
Example #5
0
    def build_model():
        bert = build_transformer_model(
            config_path,
            checkpoint_path,
            return_keras_model=False,
        )

        output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output)
        output = Dense(
            units=len(label2id) if args.task == 'category' else 1,
            activation='softmax' if args.task == 'category' else 'sigmoid',
            kernel_initializer=bert.initializer)(output)

        model = keras.models.Model(bert.model.input, output)
        model.summary()

        # AdamLR = extend_with_piecewise_linear_lr(Adam, name='AdamLR')

        model.compile(
            loss='sparse_categorical_crossentropy'
            if args.task == 'category' else 'binary_crossentropy',
            optimizer=Adam(learning_rate),  # 用足够小的学习率
            # optimizer=AdamLR(learning_rate=1e-4, lr_schedule={
            #     1000: 1,
            #     2000: 0.1
            # }),
            metrics=['accuracy'],
        )
        return model
def build_mrc_albert(args):
    bert_model = build_transformer_model(
        config_path=args.config_path,
        checkpoint_path=args.checkpoint_path,
        model='albert',
        # return_keras_model=False,
    )
    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))

    x = bert_model([x1_in, x2_in])

    #x = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x)
    #x = Lambda(lambda x: x, output_shape=lambda s:s)(x)

    p_start = Dense(1, activation='sigmoid', name="p_start")(x)
    p_end = Dense(1, activation='sigmoid', name="p_end")(x)

    model = Model([x1_in, x2_in], [p_start, p_end])

    model.compile(
        loss=focal_loss(gamma=2., alpha=.25),
        #loss='binary_crossentropy',
        optimizer=Adam(args.lr),
        #loss_weights=[1., 1.]
        #metrics=['accuracy']
    )

    print(model.summary())
    return model
Example #7
0
 def compile_model(self):
     self.model_.compile(
         # self.model.compile(
         loss=self.CRF.sparse_loss,
         optimizer=Adam(self.learning_rate),
         metrics=[self.CRF.sparse_accuracy])
     logger.info('compile model done')
Example #8
0
def get_sentiment_model():
    global model

    class CrossEntropy(Loss):
        """交叉熵作为loss,并mask掉padding部分
        """
        def compute_loss(self, inputs, mask=None):
            y_true, y_pred = inputs
            if mask[1] is None:
                y_mask = 1.0
            else:
                y_mask = K.cast(mask[1], K.floatx())[:, 1:]
            y_true = y_true[:, 1:]  # 目标token_ids
            y_pred = y_pred[:, :-1]  # 预测序列,错开一位
            accuracy = keras.metrics.sparse_categorical_accuracy(
                y_true, y_pred)
            accuracy = K.sum(accuracy * y_mask) / K.sum(y_mask)
            self.add_metric(accuracy, name='accuracy')
            loss = K.sparse_categorical_crossentropy(y_true, y_pred)
            loss = K.sum(loss * y_mask) / K.sum(y_mask)
            return loss

    output = CrossEntropy(1)([model.input, model.output])

    model = keras.models.Model(model.input, output)
    model.compile(optimizer=Adam(6e-4))
    model.summary()

    return model
Example #9
0
    def buildmodel(self):
        self.token_dict, self.keep_tokens = load_vocab(
            dict_path=self.dict_path,
            simplified=True,
            startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]'],
        )
        self.tokenizer = Tokenizer(self.token_dict, do_lower_case=True)

        if self.pretrain_type == 'albert':
            model = build_transformer_model(
                config_path,
                checkpoint_path,
                model='albert',
                with_mlm=True,
                keep_tokens=self.keep_tokens,
            )
        elif self.pretrain_type == 'bert':
            model = build_transformer_model(
                config_path,
                checkpoint_path,
                model='bert',
                with_mlm=True,
                keep_tokens=self.keep_tokens,
            )
        output = Lambda(lambda x: x[:, 1:self.max_a_len + 1])(model.output)
        #print(output.shape)
        self.model = Model(model.input, output)
        self.model.compile(loss=self.masked_cross_entropy,
                           optimizer=Adam(self.lr))
        self.model.summary()
def build_model():
    """
    构建模型主体。
    :return: 模型对象
    """
    with SESS.as_default():
        with SESS.graph.as_default():
            # 搭建bert模型主体
            bert_model = build_transformer_model(
                config_path=bert_config.config_path,
                checkpoint_path=bert_config.checkpoint_path,
                return_keras_model=False,
                model=bert_config.model_type)

            # l为模型内部的层名,格式为--str
            for l in bert_model.layers:
                bert_model.model.get_layer(l).trainable = True
            # 取出[CLS]对应的向量用来做分类
            t = Lambda(lambda x: x[:, 0])(bert_model.model.output)
            t = Dropout(cameo_train_config.drop_out_rate)(t)
            # 预测事件cameo
            cameo_out_put = Dense(len(ID2LABEL), activation='softmax')(t)
            # cameo模型主体
            cameo_model = Model(bert_model.model.inputs, cameo_out_put)

            cameo_model.compile(loss='sparse_categorical_crossentropy',
                                optimizer=Adam(
                                    cameo_train_config.learning_rate),
                                metrics=['accuracy'])

            cameo_model.summary()

    return cameo_model
Example #11
0
def build_model():
    """
    搭建模型结构,返回模型对象
    :return: model
    """
    # 构建bert模型
    bert_model = build_transformer_model(
        config_path=bert_config.config_path,
        checkpoint_path=bert_config.checkpoint_path,
        model=bert_config.model_type,
        return_keras_model=False)
    # l为模型内部的层名,格式为--str
    for l in bert_model.layers:
        bert_model.model.get_layer(l).trainable = True

    # 构建模型主体
    t = Lambda(lambda x: x[:, 0])(bert_model.model.output)  # 取出[CLS]对应的向量用来做分类
    t = Dropout(match_train_config.drop_out_rate)(t)
    # 模型预测输出
    output = Dense(units=2, activation='softmax')(t)

    model = Model(bert_model.model.inputs, output)
    model.summary()

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=Adam(match_train_config.learning_rate),  # 用足够小的学习率
        metrics=['accuracy'],
    )

    return model
Example #12
0
def get_model(tokens, keep_tokens):
    model = build_transformer_model(
        config_path=BaseConfig.config_path,
        checkpoint_path=BaseConfig.checkpoint_path,
        with_mlm=True,
        model="nezha",
        keep_tokens=[0, 100, 101, 102, 103, 100, 100] +
        keep_tokens[:len(tokens)])
    model.compile(loss=masked_crossentropy, optimizer=Adam(2e-5))
    model.summary()
    return model
def build_model():
    bert_model = build_transformer_model(
        config_path=Config.config_path,
        checkpoint_path=Config.checkpoint_path,
        return_keras_model=False)

    # 补充输入
    subject_labels = Input(shape=(None, 2))
    subject_ids = Input(shape=(2, ))
    object_labels = Input(shape=(None, len(predicate2id), 2))

    # 预测subject
    output = Dense(units=2,
                   activation='sigmoid',
                   kernel_initializer=bert_model.initializer)(
                       bert_model.model.output)

    subject_preds = Lambda(lambda x: x**2)(output)

    subject_model = Model(bert_model.inputs, subject_preds)
    # 传入subject,预测object
    output = bert_model.model.layers[-2].get_output_at(-1)
    subject = Lambda(extrac_subject)([output, subject_ids])
    output = LayerNormalization(conditional=True)([output, subject])
    output = Dense(units=len(predicate2id) * 2,
                   activation='sigmoid',
                   kernel_initializer=bert_model.initializer)(output)
    output = Lambda(lambda x: x**4)(output)
    object_preds = Reshape((-1, len(predicate2id), 2))(output)

    object_model = Model(bert_model.model.inputs + [subject_ids], object_preds)

    # 训练模型
    train_model = Model(
        bert_model.model.inputs + [subject_labels, subject_ids, object_labels],
        [subject_preds, object_preds])

    mask = bert_model.model.get_layer('Embedding-Token').output_mask
    mask = K.cast(mask, K.floatx())

    subject_loss = K.binary_crossentropy(subject_labels, subject_preds)
    subject_loss = K.mean(subject_loss, 2)
    subject_loss = K.sum(subject_loss * mask) / K.sum(mask)

    object_loss = K.binary_crossentropy(object_labels, object_preds)
    object_loss = K.sum(K.mean(object_loss, 3), 2)
    object_loss = K.sum(object_loss * mask) / K.sum(mask)

    train_model.add_loss(subject_loss + object_loss)

    optimizer = Adam(Config.learning_rate)

    train_model.compile(optimizer=optimizer)
    return train_model, subject_model, object_model
Example #14
0
def build_model(mode='bert', filename='bert', lastfour=False, LR=1e-5, DR=0.2):
    if filename == 'bert':
        path = './chinese_L-12_H-768_A-12/'
    elif filename == 'ernie':
        path = './chinese_L-12_H-768_A-12/'
    elif filename == 'roberta':
        path = './chinese_L-12_H-768_A-12/'
    config_path = path + 'bert_config.json'
    checkpoint_path = path + 'bert_model.ckpt'
    dict_path = path + 'vocab.txt'

    global tokenizer
    tokenizer = Tokenizer(dict_path, do_lower_case=True)

    bert = build_transformer_model(
        config_path=config_path,
        checkpoint_path=checkpoint_path,
        with_pool=True,
        model=mode,
        return_keras_model=False,
    )
    if lastfour:
        model = Model(inputs=bert.model.input,
                      outputs=[
                          bert.model.layers[-3].get_output_at(0),
                          bert.model.layers[-11].get_output_at(0),
                          bert.model.layers[-19].get_output_at(0),
                          bert.model.layers[-27].get_output_at(0),
                      ])
        output = model.outputs
        output1 = Lambda(lambda x: x[:, 0], name='Pooler1')(output[0])
        output2 = Lambda(lambda x: x[:, 0], name='Pooler2')(output[1])
        output3 = Lambda(lambda x: x[:, 0], name='Pooler3')(output[2])
        output4 = Lambda(lambda x: x[:, 0], name='Pooler4')(output[3])

        output = Concatenate(axis=1)([output1, output2, output3, output4])

    else:
        output = bert.model.output

    output = Dropout(rate=DR)(output)
    output = Dense(units=2,
                   activation='softmax',
                   kernel_initializer=bert.initializer)(output)

    model = Model(bert.model.input, output)

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=Adam(LR),
        metrics=['accuracy'],
    )
    return model
def build_model():
    model = build_transformer_model(
        config.config_path,
        config.checkpoint_path,
        application='unilm',
        keep_tokens=keep_tokens  # 只保留keep_tokens中的字,精简原字表
    )

    output = CrossEntropy(2)(model.inputs + model.outputs)

    model = Model(model.inputs, output)
    model.compile(optimizer=Adam(1e-5))
    return model
Example #16
0
def GeneratePretrain(c_e, g_pre_lr):
    c_in = Input(shape=(1, ))
    c = Embedding(2, c_e)(c_in)
    c = Reshape((128, ))(c)
    model = build_transformer_model(
        config_path,
        checkpoint_path,
        application='lm',
        keep_tokens=keep_tokens,
        layer_norm_cond=c,
        additional_input_layers=c_in,
    )
    output = CrossEntropy(1)([model.inputs[0], model.outputs[0]])
    model = Model(model.inputs, output)
    model.compile(optimizer=Adam(g_pre_lr))
    return model
Example #17
0
 def build_model(self):
     c_in = Input(shape=(1, ))
     c = Embedding(2, self.c_e)(c_in)
     c = Reshape((self.c_e, ))(c)
     model = build_transformer_model(
         config_path=config_path,
         checkpoint_path=checkpoint_path,
         application='lm',
         keep_tokens=keep_tokens,
         layer_norm_cond=c,
         additional_input_layers=c_in,
     )
     output = model.outputs[0]
     model = Model(model.inputs, output)
     model.compile(optimizer=Adam(self.g_lr), loss=self.loss)
     return model
Example #18
0
def build_crf_adversarial_bert(num_labels, model_name='electra'):
    model = build_transformer_model(config_path,
                                    checkpoint_path,
                                    model=model_name)
    for layer in model.layers:
        layer.trainable = True
    output = Dense(num_labels)(model.output)
    CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
    output = CRF(output)

    model = Model(model.input, output)

    model.compile(loss=CRF.sparse_loss,
                  optimizer=Adam(learning_rate),
                  metrics=[CRF.sparse_accuracy])

    return model, CRF
    def build_model(self):
        model = build_transformer_model(
            self.config_path,
            self.checkpoint_path,
            model='electra'
        )
        output_layer = 'Transformer-%s-FeedForward-Norm' % (12 - 1)
        output = model.get_layer(output_layer).output
        output = Dense(11)(output)
        self.CRF = ConditionalRandomField(lr_multiplier=100)
        output = self.CRF(output)

        model = Model(model.input, output)
        model.summary()
        model.compile(loss=self.CRF.sparse_loss,
                      optimizer=Adam(1e-4),
                      metrics=[self.CRF.sparse_accuracy]
                      )
        return model
Example #20
0
def build_bert(num_labels):
    model = build_transformer_model(config_path,
                                    checkpoint_path)  # ,model = 'electra')
    for layer in model.layers:
        layer.trainable = True
    # bilstm = Bidirectional(GRU(200, return_sequences=True))(model.output)
    #     bilstm = SpatialDropout1D(0.5)(bilstm)
    output = Dense(num_labels)(model.output)
    CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
    output = CRF(output)

    model = Model(model.input, output)
    # model.summary()

    # model = multi_gpu_model(model, gpus= 2)
    model.compile(loss=CRF.sparse_loss,
                  optimizer=Adam(learning_rate),
                  metrics=[CRF.sparse_accuracy])
    return model, CRF
Example #21
0
    def get_model(self):
        pretrained_bert = build_transformer_model(
            config.bert_config_path,
            config.bert_checkpoint_path,
        )

        pretrained_bert.trainable = True
        set_trainable = False
        for layer in pretrained_bert.layers:
            if (layer.name.startswith('Transformer-10')
                    or layer.name.startswith('Transformer-11')
                    or layer.name.startswith('Transformer-9')):
                set_trainable = True
            if set_trainable:
                layer.trainable = True
            else:
                layer.trainable = False

        last_layer1 = 'Transformer-%s-FeedForward-Norm' % (config.bert_layers -
                                                           1)
        output_layer1 = pretrained_bert.get_layer(last_layer1).output
        last_layer2 = 'Transformer-%s-FeedForward-Norm' % (config.bert_layers -
                                                           2)
        output_layer2 = pretrained_bert.get_layer(last_layer2).output
        last_layer3 = 'Transformer-%s-FeedForward-Norm' % (config.bert_layers -
                                                           3)
        output_layer3 = pretrained_bert.get_layer(last_layer3).output
        output = keras.layers.add(
            [output_layer1, output_layer2, output_layer3])

        output = Bidirectional(LSTM(128, return_sequences=True))(output)
        output = Dense(config.num_labels)(output)  # 27分类

        output = self.CRF(output)

        model = Model(pretrained_bert.input, output)

        model.compile(loss=self.CRF.sparse_loss,
                      optimizer=Adam(config.learning_rate),
                      metrics=[self.CRF.sparse_accuracy])

        return model
Example #22
0
 def build_model(self):
     """
     建模,加载bert预训练模型,并在最后几层进行微调
     :return:
     """
     bert_model = build_transformer_model(config_path=args.BERT_CONFIG,
                                          checkpoint_path=args.BERT_MODEL)
     output = bert_model.get_layer(args.BERT_LAYER).output
     output = Dropout(rate=0.5)(output)
     output = Dense(_labels_num)(output)
     CRF = ConditionalRandomField(lr_multiplier=1)
     p = CRF(output)
     model = Model(bert_model.input,p)
     model.compile(
         loss=CRF.sparse_loss,
         optimizer=Adam(lr=1e-5),
         metrics=[CRF.sparse_accuracy]
     )
     model.summary()
     return model
Example #23
0
def train():
    train_data = loader.load_data('./round1_train/data/train.txt'
                                  )  # 第一个维度为所有训练样本中句子个数,第二个维度是每个句子所包含的(实体,类别)数
    valid_data = loader.load_data('./round1_train/data/val.txt')

    global train_generator
    train_generator = generator.Generator(train_data=train_data,
                                          batch_size=batch_size,
                                          tokenizer=tokenizer,
                                          maxlen=maxlen,
                                          label2id=loader.label2id)

    global model
    model = build_transformer_model(
        config_path,
        checkpoint_path,
    )  # 根据bert_model.ckpt和bert_config.json文件构建transformer模型

    output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
    output = model.get_layer(output_layer).output  # shape=(None, None, 768)
    output = Dense(loader.num_labels)(output)  # 27分类,13类*(B+I)+O

    output = CRF(output)

    model = Model(model.input, output)
    model.summary()

    model.compile(loss=CRF.sparse_loss,
                  optimizer=Adam(learing_rate),
                  metrics=[CRF.sparse_accuracy])

    NER = models.NamedEntityRecognizer(trans=K.eval(CRF.trans),
                                       starts=[0],
                                       ends=[0])
    evaluate = evaluator.Evaluator(valid_data, tokenizer, model, NER, CRF,
                                   loader)

    model.fit_generator(train_generator.forfit(),
                        steps_per_epoch=len(train_generator),
                        epochs=epochs,
                        callbacks=[evaluate])
Example #24
0
def bertmodel():
    model = build_transformer_model(
        config_path,
        checkpoint_path,
    )
    output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
    output = model.get_layer(output_layer).output
    output = Dense(num_labels)(output)  # 27分类

    CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier)
    output = CRF(output)

    model = Model(model.input, output)
    #     model.summary()

    model.compile(
        loss=CRF.sparse_loss,
        optimizer=Adam(learing_rate),
        metrics=[CRF.sparse_accuracy]
    )
    return model, CRF
def build_model(embeddings=100,vocab_size=vocab_size,rnn_units=100):
   
    x_in = Input(shape=(None,))
    output=Embedding(input_dim=vocab_size, output_dim=embeddings,
                                            trainable=True, mask_zero=True)(x_in)
    
    flstm_output=LSTM(units=rnn_units, return_sequences=True)(output)
    seq_output=Dropout(0.5)(flstm_output)
    seq_output=TimeDistributed(Dense(num_seglabels), name='dense_seq')(seq_output)
    
    Seq_crf = ConditionalRandomField(lr_multiplier=seq_crf_lr_multiplier,name='seq_crf')
    
    seq_output=Seq_crf(seq_output)
    
    
    reverse_output=Lambda(lambda x: K.reverse(x,axes=1))(output)
    
    reverse_output=LSTM(units=rnn_units, return_sequences=True)(reverse_output)
    
    blstm_output=Lambda(lambda x: K.reverse(x,axes=1))(reverse_output)
    
    lstm_out=Concatenate()([flstm_output,blstm_output])
    tag_output=Dropout(0.5)(lstm_out)
    tag_output=TimeDistributed(Dense(num_labels), name='dense_tag')(tag_output)
    
    Tag_crf = ConditionalRandomField(lr_multiplier=tag_crf_lr_multiplier,name='tag_crf')
    
    tag_output=Tag_crf(tag_output)
    
    

    model = Model(x_in, [seq_output,tag_output])
    model.summary()

    model.compile(
        loss=[Seq_crf.sparse_loss,Tag_crf.sparse_loss],
        optimizer=Adam(learing_rate),
        metrics=[SparseAccuracy()]
    )
    return model,Seq_crf,Tag_crf
Example #26
0
def train(train_param, model_save_path):
    #    logger.info()
    train_data, valid_data, schema_dict = load_data()
    train_param['schema_dict'] = schema_dict

    #    print(train_param)

    # 建立分词器
    tokenizer = Tokenizer(train_param['dict_path'], do_lower_case=True)
    trainmodel = TagModel(train_param)

    trainmodel.model.compile(loss=trainmodel.CRF.sparse_loss,
                             optimizer=Adam(train_param['learing_rate']),
                             metrics=[trainmodel.CRF.sparse_accuracy])

    train_generator = data_generator(train_data, train_param['batch_size'],
                                     tokenizer, schema_dict['label2id'],
                                     train_param['maxlen'])

    trainmodel.model.fit_generator(
        train_generator.forfit(),
        steps_per_epoch=len(train_generator),
        epochs=train_param['epochs'],
    )
    savemodel_name = os.path.join(model_save_path, 'best_model.weights')
    trainmodel.model.save_weights(savemodel_name)

    params_file = os.path.join(model_save_path, 'config.json')
    with open(params_file, 'w', encoding='utf-8') as json_file:
        json.dump(train_param, json_file, indent=4, ensure_ascii=False)

    NER = NamedEntityRecognizer(K.eval(trainmodel.CRF.trans),
                                trainmodel.model,
                                tokenizer,
                                schema_dict['id2label'],
                                starts=[0],
                                ends=[0])
    eval_result = evaluate(valid_data, NER)

    return eval_result
Example #27
0
    def build_model():
        bert = build_transformer_model(
            config_path,
            checkpoint_path,
            return_keras_model=False,
        )

        output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output)
        output = Dense(units=2,
                       activation='softmax',
                       kernel_initializer=bert.initializer)(output)

        model = keras.models.Model(bert.model.input, output)
        model.summary()

        model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer=Adam(learning_rate),  # 用足够小的学习率
            metrics=['accuracy'],
        )

        return model
Example #28
0
def make_model(config_path, checkpoint_path, prefix):

    if prefix == 'BERT' or prefix == 'roberta-large':
        bert = build_bert_model(
            config_path=config_path,
            checkpoint_path=checkpoint_path,
            with_pool=True,
            return_keras_model=False,
        )
    if prefix == 'NEZHA':
        bert = build_bert_model(
            config_path=config_path,
            checkpoint_path=checkpoint_path,
            model='nezha',
            with_pool=True,
            return_keras_model=False,
        )

    output = Dropout(rate=0.01)(bert.model.output)
    ## 加了adversarial 层后,可以考虑更稳定些
    #output = Lambda(lambda x: x[:, 0])(bert.model.output)

    output = Dense(units=2,
                   activation='softmax',
                   kernel_initializer=bert.initializer)(output)

    model = keras.models.Model(bert.model.input, output)
    # model.summary()

    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=Adam(args.lr),
        metrics=['accuracy'],
    )
    # 写好函数后,启用对抗训练只需要一行代码
    adversarial_training(model, 'Embedding-Token', args.alpha)

    return model
Example #29
0
def build_model():
    with tf.device("/gpu:1"):
        model = build_transformer_model(
            config_path,
            checkpoint_path,
        )
    # output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers-1)

    with tf.device("/gpu:0"):
        output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
        output = model.get_layer(output_layer).output
        # output = Bidirectional(LSTM(unit, return_sequences=True))(output)
        output = MyDense(num_labels)(output)
        CRF = MyConditionalRandomField(lr_multiplier=crf_lr_multiplier)
        output = CRF(output)

        model = Model(model.input, output)
        # model = multi_gpu_model(model,2)
        model.summary()
        model.compile(loss=CRF.sparse_loss,
                      optimizer=Adam(learing_rate),
                      metrics=[CRF.sparse_accuracy])

    return model, CRF
Example #30
0
def build_model():
    """构建模型。"""
    model = build_transformer_model(
        config_path,
        checkpoint_path,
        model='nezha',
        application='unilm',
        keep_tokens=keep_tokens,  # 只保留keep_tokens中的字,精简原字表
    )

    o_in = Input(shape=(None, ))
    train_model = Model(model.inputs + [o_in], model.outputs + [o_in])

    # 交叉熵作为loss,并mask掉输入部分的预测
    y_true = train_model.input[2][:, 1:]  # 目标tokens
    y_mask = train_model.input[1][:, 1:]
    y_pred = train_model.output[0][:, :-1]  # 预测tokens,预测与目标错开一位
    cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
    cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)

    train_model.add_loss(cross_entropy)
    train_model.compile(optimizer=Adam(1e-5))

    return model, train_model