예제 #1
0
                               )
output = Lambda(lambda x: x[:, 0])(bert.output)

y_in = Input(shape=(None,))

# scale_output = Dense(256, kernel_initializer=bert.initializer)(output)
# logits = Dense(num_classes)(output)
scl_output = SupervisedContrastiveLearning(alpha=0.05, T=0.05, output_idx=0)([output, y_in])

clf_output = Dense(num_classes, activation='softmax')(output)
clf_ce = CrossEntropy(output_idx=0, alpha=0.95)([clf_output, y_in])
model = Model(bert.inputs, clf_output)
model.summary()

train_model = Model(bert.inputs + [y_in], [scl_output, clf_ce])
train_model.compile(optimizer=Adam(lr))


if __name__ == '__main__':
    evaluator = Evaluator()
    train_model.fit_generator(train_generator.generator(),
                              steps_per_epoch=len(train_generator),
                              epochs=epochs,
                              callbacks=[evaluator])

    # tsne
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt

    f = K.function(bert.inputs, output)
예제 #2
0
teacher = build_transformer_model(config_path=config_path,
                                  checkpoint_path=checkpoint_path,
                                  return_keras_model=False,
                                  num_hidden_layers=num_hidden_layers,
                                  model='bert')

# 判别模型
x_in = Input(shape=K.int_shape(teacher.output)[1:])
x = Lambda(lambda x: x[:, 0])(x_in)
x = Dense(units=num_classes, activation='softmax')(x)
classifier = Model(x_in, x)

teacher_model = Model(teacher.inputs, classifier(teacher.output))
teacher_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(2e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)

teacher_model.summary()


class FastbertClassifierLayer(Layer):
    """FastBert 中用来做分类的层,为了增加分类层的性能,同时参数不能太大,所以作者选择了一个hidden size
    更小的transformer
    """
    def __init__(self,
                 labels_num,
                 hidden_size=128,
                 head_nums=2,
                 head_size=64,
예제 #3
0
model = build_transformer_model(config_path,
                                checkpoint_path,
                                application='unilm',
                                keep_tokens=keep_tokens)
model.summary()

# train model
o_inputs = Input(shape=(None, ))
train_model = Model(model.inputs + [o_inputs], model.outputs + [o_inputs])
y_true = train_model.inputs[2][:, 1:]
y_mask = train_model.inputs[1][:, 1:]
y_pred = train_model.outputs[0][:, :-1]
cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)
train_model.add_loss(cross_entropy)
train_model.compile(Adam(1e-5))


class QuestionGenerator(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.wraps('probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate(
            [segment_ids, np.ones_like(output_ids)], 1)
        ret = model.predict([token_ids, segment_ids])[:, -1]
        return ret

    def generate(self, context, answer, topk=2, random=False):
# 加载预训练模型(3层)
successor = build_transformer_model(config_path=config_path,
                                    checkpoint_path=checkpoint_path,
                                    return_keras_model=False,
                                    num_hidden_layers=3,
                                    prefix='Successor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Dense(num_labels)(x_in)
CRF = ConditionalRandomField(lr_multiplier=2)
x = CRF(x)
classifier = Model(x_in, x)

opt = Adam(learning_rate=lr)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.outputs))
predecessor_model.compile(
    loss=predecessor_model.layers[-1].layers[-1].sparse_loss,
    optimizer=opt,
    metrics=[CRF.sparse_accuracy])

predecessor_model.summary()

successor_model = Model(successor.inputs, classifier(successor.outputs))
successor_model.compile(loss=successor_model.layers[-1].layers[-1].sparse_loss,
                        optimizer=opt,
                        metrics=[CRF.sparse_accuracy])
successor_model.summary()
                               return_keras_model=False)
label_inputs = Input(shape=(None, ), name='label_inputs')

pooler = bert.model.outputs[0]
classification_output = Dense(units=num_classes,
                              activation='softmax',
                              name='classifier')(pooler)
classifier = Model(bert.model.inputs, classification_output)

seq2seq = Model(bert.model.inputs, bert.model.outputs[1])

outputs = TotalLoss([2])(bert.model.inputs + bert.model.outputs)
# outputs = Dense(num_classes, activation='softmax')(outputs)
train_model = Model(bert.model.inputs, [classification_output, outputs])
train_model.compile(loss=['sparse_categorical_crossentropy', None],
                    optimizer=Adam(1e-5),
                    metrics=['acc'])
train_model.summary()


def evaluate(val_data=valid_generator):
    total = 0.
    right = 0.
    for x, y_true in tqdm(val_data):
        y_pred = classifier.predict(x).argmax(axis=-1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()
    print(total, right)
    return right / total
예제 #6
0
# 加载预训练模型(12层)
predecessor = build_transformer_model(config_path=config_path,
                                      checkpoint_path=checkpoint_path,
                                      return_keras_model=False,
                                      prefix='Predecessor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Lambda(lambda x: x[:, 0])(x_in)
x = Dense(units=num_classes, activation='softmax')(x)
classifier = Model(x_in, x)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.output))
predecessor_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(1e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
predecessor_model.summary()

# predecessor_model_3
output = predecessor_model.layers[31].output  # 第3层transform
output = Lambda(lambda x: x[:, 0])(output)
dense = ScaleDense(lr_multiplier=5,
                   units=num_classes,
                   activation='softmax',
                   weights=predecessor_model.layers[-1].get_weights())
output = dense(output)

predecessor_3_model = Model(predecessor_model.inputs, output)
predecessor_3_model.compile(
train_generator = data_generator(data=train_data, batch_size=batch_size)
val_generator = data_generator(valid_data, batch_size)

# build model
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               num_hidden_layers=num_hidden_layers)
output = Lambda(lambda x: x[:, 0])(bert.output)
output = Dense(num_classes, activation='softmax')(output)

model = Model(bert.inputs, output)
model.summary()

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(lr),
              metrics=['acc'])


def evaluate(data):
    total, right = 0., 0.
    for x_true, y_true in tqdm(data):
        y_pred = model.predict(x_true).argmax(axis=1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()

    return right / total


class Evaluator(keras.callbacks.Callback):

# teacher model(12层)
teacher = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    return_keras_model=True,
    num_hidden_layers=12,
    prefix='Teacher-'
)
output = Lambda(lambda x: x[:, 0])(teacher.output)
logits = Dense(num_classes)(output)
soften = Activation(activation='softmax')(logits)
teacher_logits = Model(teacher.inputs, logits)
teacher_soften = Model(teacher.inputs, soften)
teacher_soften.compile(loss='categorical_crossentropy', optimizer=Adam(2e-5), metrics=['acc'])
teacher_soften.summary()


class StudentDataGenerator(DataGenerator):
    """数据生成器
    """

    def __iter__(self, shuffle=False):
        batch_token_ids, batch_segment_ids, batch_labels, batch_logits = [], [], [], []
        for is_end, (text, label, logits) in self.get_sample(shuffle):
            token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)

            batch_labels.append(label)
              (val_acc, self.best_val_acc))


# teacher model(12层)
teacher = build_transformer_model(config_path=config_path,
                                  checkpoint_path=checkpoint_path,
                                  return_keras_model=True,
                                  num_hidden_layers=12,
                                  prefix='Teacher-')
output = Lambda(lambda x: x[:, 0])(teacher.output)
logits = Dense(num_classes)(output)
soften = Activation(activation='softmax')(logits)
teacher_logits = Model(teacher.inputs, logits)
teacher_soften = Model(teacher.inputs, soften)
teacher_soften.compile(loss='categorical_crossentropy',
                       optimizer=Adam(2e-5),
                       metrics=['acc'])
teacher_soften.summary()


class StudentDataGenerator(DataGenerator):
    """数据生成器
    """
    def __iter__(self):
        batch_token_ids, batch_segment_ids, batch_labels, batch_logits = [], [], [], []
        for is_end, (text, label, logits) in self.get_sample():
            token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
            batch_token_ids.append(token_ids)
            batch_segment_ids.append(segment_ids)

            batch_labels.append(label)
예제 #10
0
        right += (y_true == y_pred).sum()
    return right / total


class Evaluator(keras.callbacks.Callback):
    def __init__(self, savename):
        self.best_val_acc = 0.
        self.savename = savename

    def on_epoch_end(self, epoch, logs=None):
        val_acc = evaluate(valid_generator, self.model)
        if val_acc > self.best_val_acc:
            self.best_val_acc = val_acc
            self.model.save_weights(self.savename)
        print(
            u'val_acc: %.5f, best_val_acc: %.5f\n' %
            (val_acc, self.best_val_acc)
        )


if __name__ == '__main__':
    evaluator = Evaluator('best_clf.weights')
    model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(1e-5), metrics=['acc'])
    model.fit_generator(train_sim_generator.generator(),
                        steps_per_epoch=len(train_sim_generator) * 2,
                        epochs=5,
                        callbacks=[evaluator]
                        )
else:
    model.load_weights('best_clf.weights')
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


# build model
model = build_transformer_model(config_path,
                                checkpoint_path,
                                application='unilm',
                                keep_tokens=keep_tokens)

output = CrossEntropy(2)(model.inputs + model.outputs)

model = Model(model.inputs, output)
model.compile(optimizer=Adam(1e-5))
model.summary()


class QuestionAnswerGenerator(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.wraps('probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate(
            [segment_ids, np.ones_like(output_ids)], 1)
        ret = model.predict([token_ids, segment_ids])[:, -1]
        return ret
예제 #12
0
            if len(batch_token_ids) == self.batch_size or is_end:
                batch_token_ids = pad_sequences(batch_token_ids)
                batch_segment_ids = pad_sequences(batch_segment_ids)
                batch_labels = pad_sequences(batch_labels)

                yield [batch_token_ids, batch_segment_ids], batch_labels

                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


train_generator = data_generator(data=train_data, batch_size=batch_size)
val_generator = data_generator(valid_data, batch_size)

# create opt before build model
opt = Adam(lr)
opt = tf.train.experimental.enable_mixed_precision_graph_rewrite(opt)  # 开启混合精度

# build model
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               num_hidden_layers=num_hidden_layers)
output = Lambda(lambda x: x[:, 0])(bert.output)
output = Dense(num_classes, activation='softmax')(output)

model = Model(bert.inputs, output)
model.summary()

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=opt,
              metrics=['acc'])
예제 #13
0
    y_true = K.one_hot(y_true, K.shape(y_pred)[2])
    # 计算交叉熵
    return K.mean(K.categorical_crossentropy(y_true, y_pred))


def sparse_accuracy(y_true, y_pred):
    # y_true需要重新明确一下shape和dtype
    y_true = K.reshape(y_true, K.shape(y_pred)[:-1])
    y_true = K.cast(y_true, 'int32')
    # 计算准确率
    y_pred = K.cast(K.argmax(y_pred, axis=2), 'int32')
    return K.mean(K.cast(K.equal(y_true, y_pred), K.floatx()))


model.compile(loss=sparse_categorical_crossentropy,
              optimizer=Adam(learing_rate),
              metrics=[sparse_accuracy])


def extract_answer(question, context, max_a_len=16):
    """抽取答案函数
    """
    max_q_len = 48
    q_token_ids = tokenizer.encode(question, maxlen=max_q_len)[0]
    c_token_ids = tokenizer.encode(context,
                                   maxlen=maxlen - len(q_token_ids) + 1)[0]
    token_ids = q_token_ids + c_token_ids[1:]
    segment_ids = [0] * len(q_token_ids) + [1] * (len(c_token_ids) - 1)
    c_tokens = tokenizer.tokenize(context)[1:-1]
    mapping = tokenizer.rematch(context, c_tokens)
    probas = model.predict([[token_ids], [segment_ids]])[0]