Ejemplo n.º 1
0
                               )
output = Lambda(lambda x: x[:, 0])(bert.output)

y_in = Input(shape=(None,))

# scale_output = Dense(256, kernel_initializer=bert.initializer)(output)
# logits = Dense(num_classes)(output)
scl_output = SupervisedContrastiveLearning(alpha=0.05, T=0.05, output_idx=0)([output, y_in])

clf_output = Dense(num_classes, activation='softmax')(output)
clf_ce = CrossEntropy(output_idx=0, alpha=0.95)([clf_output, y_in])
model = Model(bert.inputs, clf_output)
model.summary()

train_model = Model(bert.inputs + [y_in], [scl_output, clf_ce])
train_model.compile(optimizer=Adam(lr))


if __name__ == '__main__':
    evaluator = Evaluator()
    train_model.fit_generator(train_generator.generator(),
                              steps_per_epoch=len(train_generator),
                              epochs=epochs,
                              callbacks=[evaluator])

    # tsne
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt

    f = K.function(bert.inputs, output)
Ejemplo n.º 2
0
    train_model.fit(
        pretrain_generator.generator(),
        steps_per_epoch=len(pretrain_generator),
        epochs=pretrain_epochs,
        callbacks=[checkpoint, csv_logger],
    )

    # build task fine-tune model
    # reload weights without mlm
    # bert_without_mlm = build_transformer_model(checkpoint_path=model_saved_path,
    #                                            config_path=config_path, with_mlm=False)

    idx = 11
    feed_forward_name = 'Transformer-%d-FeedForward' % idx
    bert_without_mlm = bert.layers[feed_forward_name]
    output = Lambda(lambda x: x[:, 0])(bert_without_mlm.output)
    output = Dense(num_classes, activation='softmax')(output)

    model = Model(bert.inputs, output)
    model.summary()

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=Adam(fine_tune_lr),
                  metrics=['acc'])

    evaluator = Evaluator()
    model.fit_generator(train_generator.generator(),
                        steps_per_epoch=len(train_generator),
                        epochs=fine_tune_epochs,
                        callbacks=[evaluator])
                                    return_keras_model=False,
                                    num_hidden_layers=3,
                                    prefix='Successor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Dense(num_labels)(x_in)
CRF = ConditionalRandomField(lr_multiplier=2)
x = CRF(x)
classifier = Model(x_in, x)

opt = Adam(learning_rate=lr)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.outputs))
predecessor_model.compile(
    loss=predecessor_model.layers[-1].layers[-1].sparse_loss,
    optimizer=opt,
    metrics=[CRF.sparse_accuracy])

predecessor_model.summary()

successor_model = Model(successor.inputs, classifier(successor.outputs))
successor_model.compile(loss=successor_model.layers[-1].layers[-1].sparse_loss,
                        optimizer=opt,
                        metrics=[CRF.sparse_accuracy])
successor_model.summary()

theseus_model = bert_of_theseus(predecessor, successor, classifier)
theseus_model.compile(loss=theseus_model.layers[-1].layers[-1].sparse_loss,
                      optimizer=opt,
                      metrics=[CRF.sparse_accuracy])
theseus_model.summary()
Ejemplo n.º 4
0
        trans = K.eval(CRF.trans)
        wordseg.trans = trans
        print(trans)
        acc = evaluate(val_data)

        if acc > self.best_acc:
            self.best_acc = acc
            model.save_weights('./best_model.weights')
        print('acc is: {:.3f}, best acc is :{:.4f}'.format(acc, self.best_acc))

    def on_train_end(self, logs=None):
        model.load_weights('./best_model.weights')
        public_evaluate(test_path, test_result_path, test_score_path)


opt = extend_with_gradient_accumulation(Adam)
opt = opt(learning_rate=lr)
model.compile(loss=CRF.sparse_loss,
              optimizer=opt,
              metrics=[CRF.sparse_accuracy])

if __name__ == '__main__':
    evaluator = Evaluator()
    train_genarator = data_generator(train_data, batch_size)
    model.fit_generator(train_genarator.generator(),
                        steps_per_epoch=len(train_genarator),
                        epochs=epochs,
                        callbacks=[evaluator])
else:
    model.load_weights('./best_model.weights')
Ejemplo n.º 5
0
model = build_transformer_model(config_path,
                                checkpoint_path,
                                application='unilm',
                                keep_tokens=keep_tokens)
model.summary()

# train model
o_inputs = Input(shape=(None, ))
train_model = Model(model.inputs + [o_inputs], model.outputs + [o_inputs])
y_true = train_model.inputs[2][:, 1:]
y_mask = train_model.inputs[1][:, 1:]
y_pred = train_model.outputs[0][:, :-1]
cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)
train_model.add_loss(cross_entropy)
train_model.compile(Adam(1e-5))


class QuestionGenerator(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.wraps('probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate(
            [segment_ids, np.ones_like(output_ids)], 1)
        ret = model.predict([token_ids, segment_ids])[:, -1]
        return ret

    def generate(self, context, answer, topk=2, random=False):
bert = build_transformer_model(checkpoint_path=checkpoint_path,
                               config_path=config_path,
                               keep_tokens=keep_tokens,
                               dropout_rate=0.3,
                               )

label_inputs = Input(shape=(None,), name='label_inputs')

pooler = Lambda(lambda x: x[:, 0])(bert.output)
x = Dense(units=num_classes, activation='softmax', name='classifier')(pooler)
output = TotalLoss(4)(bert.inputs + [label_inputs, pooler, x])

model = Model(bert.inputs + [label_inputs], output)
classifier = Model(bert.inputs, x)

model.compile(optimizer=Adam(2e-5), metrics=['acc'])
model.summary()


def evaluate(val_data=valid_generator):
    total = 0.
    right = 0.
    for (x, s, y_true), _ in tqdm(val_data):
        y_pred = classifier.predict([x, s]).argmax(axis=-1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()
    print(total, right)
    return right / total

Ejemplo n.º 7
0
    y_pred = K.cast(K.argmax(y_pred, axis=2), 'int32')
    return K.mean(K.cast(K.equal(y_true, y_pred), K.floatx()))


# optimizer
optimizer = extend_with_weight_decay(Adam)
optimizer = extend_with_gradient_accumulation(optimizer)
params = {
    'learning_rate': learning_rate,
    'weight_decay_rate': 1e-5,
    'exclude_from_weight_decay': ['norm', 'bias'],
    'grad_accum_steps': 4
}
optimizer = optimizer(**params)
model.compile(loss=sparse_categorical_crossentropy,
              optimizer=optimizer,
              metrics=[sparse_accuracy])


def extract_answer(question, context, max_a_len=32):
    """抽取答案函数
    """
    max_q_len = 64
    q_token_ids = tokenizer.encode(question, maxlen=max_q_len)[0]
    c_token_ids = tokenizer.encode(context,
                                   maxlen=maxlen - len(q_token_ids) + 1)[0]
    token_ids = q_token_ids + c_token_ids[1:]
    segment_ids = [0] * len(q_token_ids) + [1] * (len(c_token_ids) - 1)
    c_tokens = tokenizer.tokenize(context)[1:-1]
    mapping = tokenizer.rematch(context, c_tokens)
    token_ids = np.array([token_ids])  # tf2.X 必须要转np.array
Ejemplo n.º 8
0
        print(u'val_acc: %.5f, best_val_acc: %.5f\n' %
              (val_acc, self.best_val_acc))


# 加载预训练模型(3层)
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               return_keras_model=False,
                               num_hidden_layers=3,
                               prefix='Successor-')
x = Lambda(lambda x: x[:, 0])(bert.output)
x = Dense(units=num_classes, activation='softmax')(x)
model = Model(bert.inputs, x)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=AdaBelief(2e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
model.summary()

if __name__ == '__main__':
    # 训练
    evaluator = Evaluator('best_model.weights')
    model.fit_generator(train_generator.generator(),
                        steps_per_epoch=len(train_generator),
                        epochs=5,
                        callbacks=[evaluator])

else:
    model.load_weights('best_model.weights')
                               return_keras_model=False)
label_inputs = Input(shape=(None, ), name='label_inputs')

pooler = bert.model.outputs[0]
classification_output = Dense(units=num_classes,
                              activation='softmax',
                              name='classifier')(pooler)
classifier = Model(bert.model.inputs, classification_output)

seq2seq = Model(bert.model.inputs, bert.model.outputs[1])

outputs = TotalLoss([2])(bert.model.inputs + bert.model.outputs)
# outputs = Dense(num_classes, activation='softmax')(outputs)
train_model = Model(bert.model.inputs, [classification_output, outputs])
train_model.compile(loss=['sparse_categorical_crossentropy', None],
                    optimizer=Adam(1e-5),
                    metrics=['acc'])
train_model.summary()


def evaluate(val_data=valid_generator):
    total = 0.
    right = 0.
    for x, y_true in tqdm(val_data):
        y_pred = classifier.predict(x).argmax(axis=-1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()
    print(total, right)
    return right / total
Ejemplo n.º 10
0
# 加载预训练模型(12层)
predecessor = build_transformer_model(config_path=config_path,
                                      checkpoint_path=checkpoint_path,
                                      return_keras_model=False,
                                      prefix='Predecessor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Lambda(lambda x: x[:, 0])(x_in)
x = Dense(units=num_classes, activation='softmax')(x)
classifier = Model(x_in, x)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.output))
predecessor_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(1e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
predecessor_model.summary()

# predecessor_model_3
output = predecessor_model.layers[31].output  # 第3层transform
output = Lambda(lambda x: x[:, 0])(output)
dense = ScaleDense(lr_multiplier=5,
                   units=num_classes,
                   activation='softmax',
                   weights=predecessor_model.layers[-1].get_weights())
output = dense(output)

predecessor_3_model = Model(predecessor_model.inputs, output)
predecessor_3_model.compile(
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


# build model
model = build_transformer_model(config_path,
                                checkpoint_path,
                                application='unilm',
                                keep_tokens=keep_tokens)

output = CrossEntropy(2)(model.inputs + model.outputs)

model = Model(model.inputs, output)
model.compile(optimizer=Adam(1e-5))
model.summary()


class QuestionAnswerGenerator(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.wraps('probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate(
            [segment_ids, np.ones_like(output_ids)], 1)
        ret = model.predict([token_ids, segment_ids])[:, -1]
        return ret
Ejemplo n.º 12
0
        acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        acc = K.sum(acc * y_mask) / K.sum(y_mask)
        self.add_metric(acc, name='acc')
        return loss


model = build_transformer_model(config_path=config_path,
                                checkpoint_path=checkpoint_path,
                                with_mlm=True)

target_in = Input(shape=(None, ))
output = CrossEntropy(1)([target_in, model.output])

train_model = Model(model.inputs + [target_in], output)
train_model.compile(optimizer=Adam(1e-5))
train_model.summary()


def evaluate(data):
    label_ids = np.array([tokenizer.encode(l)[0][1:-1] for l in labels])
    #     print(label_ids)
    total, right = 0., 0.
    for x, _ in tqdm(data):
        x, y_true = x[:2], x[2]
        y_pred = model.predict(x)[:, mask_idx]
        y_pred = y_pred[:, 0, label_ids[:, 0]] * y_pred[:, 1, label_ids[:, 1]]
        y_pred = y_pred.argmax(axis=1)
        y_true = np.array(
            [labels.index(tokenizer.decode(y)) for y in y_true[:, mask_idx]])
    with_mlm=True,
    # model='bert',  # 加载bert/Roberta/ernie
    model='nezha')

target_in = Input(shape=(None, ))
output = CrossEntropy(1)([target_in, model.output])

train_model = Model(model.inputs + [target_in], output)

AdamW = extend_with_weight_decay(Adam)
AdamWG = extend_with_gradient_accumulation(AdamW)

opt = AdamWG(learning_rate=1e-5,
             exclude_from_weight_decay=['Norm', 'bias'],
             grad_accum_steps=4)
train_model.compile(opt)
train_model.summary()

label_ids = np.array([tokenizer.encode(l)[0][1:-1] for l in labels])


def predict(x):
    if len(x) == 3:
        x = x[:2]
    y_pred = model.predict(x)[:, mask_idx]
    y_pred = y_pred[:, 0, label_ids[:, 0]]
    y_pred = y_pred.argmax(axis=1)
    return y_pred


def evaluate(data):
Ejemplo n.º 14
0
# create opt before build model
opt = Adam(lr)
opt = tf.train.experimental.enable_mixed_precision_graph_rewrite(opt)  # 开启混合精度

# build model
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               num_hidden_layers=num_hidden_layers)
output = Lambda(lambda x: x[:, 0])(bert.output)
output = Dense(num_classes, activation='softmax')(output)

model = Model(bert.inputs, output)
model.summary()

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['acc'])


def evaluate(data):
    total, right = 0., 0.
    for x_true, y_true in tqdm(data):
        y_pred = model.predict(x_true).argmax(axis=1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()

    return right / total


class Evaluator(keras.callbacks.Callback):
Ejemplo n.º 15
0
    y_true = K.one_hot(y_true, K.shape(y_pred)[2])
    # 计算交叉熵
    return K.mean(K.categorical_crossentropy(y_true, y_pred))


def sparse_accuracy(y_true, y_pred):
    # y_true需要重新明确一下shape和dtype
    y_true = K.reshape(y_true, K.shape(y_pred)[:-1])
    y_true = K.cast(y_true, 'int32')
    # 计算准确率
    y_pred = K.cast(K.argmax(y_pred, axis=2), 'int32')
    return K.mean(K.cast(K.equal(y_true, y_pred), K.floatx()))


model.compile(loss=sparse_categorical_crossentropy,
              optimizer=Adam(learing_rate),
              metrics=[sparse_accuracy])


def extract_answer(question, context, max_a_len=16):
    """抽取答案函数
    """
    max_q_len = 48
    q_token_ids = tokenizer.encode(question, maxlen=max_q_len)[0]
    c_token_ids = tokenizer.encode(context,
                                   maxlen=maxlen - len(q_token_ids) + 1)[0]
    token_ids = q_token_ids + c_token_ids[1:]
    segment_ids = [0] * len(q_token_ids) + [1] * (len(c_token_ids) - 1)
    c_tokens = tokenizer.tokenize(context)[1:-1]
    mapping = tokenizer.rematch(context, c_tokens)
    probas = model.predict([[token_ids], [segment_ids]])[0]