def bert_of_theseus(predecessor, successor, classifier):
    """bert of theseus:固定住 predecessor 和 classifier,随机替换, predecessor中的block为successor对应层来训练successor
    """
    inputs = predecessor.inputs
    # 固定住已经训练好的层
    for layer in predecessor.model.layers:
        layer.trainable = False
    classifier.trainable = False
    # Embedding层替换
    predecessor_outputs = predecessor.apply_embeddings(inputs)
    successor_outputs = successor.apply_embeddings(inputs)
    outputs = ProportionalAdd()([predecessor_outputs, successor_outputs])
    # Transformer层替换
    layers_per_module = predecessor.num_hidden_layers // successor.num_hidden_layers
    for index in range(successor.num_hidden_layers):
        predecessor_outputs = outputs
        for sub_index in range(layers_per_module):
            predecessor_outputs = predecessor.apply_attention_layers(
                predecessor_outputs, layers_per_module * index + sub_index)
        successor_outputs = successor.apply_attention_layers(outputs, index)
        outputs = ProportionalAdd()([predecessor_outputs, successor_outputs])
    # 返回模型
    outputs = classifier(outputs)
    model = Model(inputs, outputs)
    return model
def bert_of_theseus(predecessor, successor, classifier):
    """bert of theseus
    """
    inputs = predecessor.inputs
    # 固定住已经训练好的predecessor 和 classifier
    for layer in predecessor.model.layers:
        layer.trainable = False
    classifier.trainable = False
    # Embedding层替换
    # 也可以固定住Embedding
    predecessor_outputs = predecessor.apply_embeddings(inputs)
    successor_outputs = successor.apply_embeddings(inputs)
    outputs = ProportionalAdd()([predecessor_outputs, successor_outputs])
    # Transformer层替换,用successor 的 layer 替换对应predecessor的module
    layers_per_module = predecessor.num_hidden_layers // successor.num_hidden_layers
    for index in range(successor.num_hidden_layers):
        predecessor_outputs = outputs
        for sub_index in range(layers_per_module):
            predecessor_outputs = predecessor.apply_transformer_layers(
                predecessor_outputs, layers_per_module * index + sub_index)
        successor_outputs = successor.apply_transformer_layers(outputs, index)
        outputs = ProportionalAdd()([predecessor_outputs, successor_outputs])
    # 返回模型
    outputs = classifier(outputs)
    model = Model(inputs, outputs)
    return model
                                      return_keras_model=False,
                                      prefix='Predecessor-')

# 加载预训练模型(3层)
successor = build_transformer_model(config_path=config_path,
                                    checkpoint_path=checkpoint_path,
                                    return_keras_model=False,
                                    num_hidden_layers=3,
                                    prefix='Successor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Dense(num_labels)(x_in)
CRF = ConditionalRandomField(lr_multiplier=2)
x = CRF(x)
classifier = Model(x_in, x)

opt = Adam(learning_rate=lr)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.outputs))
predecessor_model.compile(
    loss=predecessor_model.layers[-1].layers[-1].sparse_loss,
    optimizer=opt,
    metrics=[CRF.sparse_accuracy])

predecessor_model.summary()

successor_model = Model(successor.inputs, classifier(successor.outputs))
successor_model.compile(loss=successor_model.layers[-1].layers[-1].sparse_loss,
                        optimizer=opt,
                        metrics=[CRF.sparse_accuracy])
        return loss


bert = build_transformer_model(checkpoint_path=checkpoint_path,
                               config_path=config_path,
                               keep_tokens=keep_tokens,
                               dropout_rate=0.3,
                               )

label_inputs = Input(shape=(None,), name='label_inputs')

pooler = Lambda(lambda x: x[:, 0])(bert.output)
x = Dense(units=num_classes, activation='softmax', name='classifier')(pooler)
output = TotalLoss(4)(bert.inputs + [label_inputs, pooler, x])

model = Model(bert.inputs + [label_inputs], output)
classifier = Model(bert.inputs, x)

model.compile(optimizer=Adam(2e-5), metrics=['acc'])
model.summary()


def evaluate(val_data=valid_generator):
    total = 0.
    right = 0.
    for (x, s, y_true), _ in tqdm(val_data):
        y_pred = classifier.predict([x, s]).argmax(axis=-1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()
    print(total, right)
Beispiel #5
0
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


# build model
model = build_transformer_model(config_path,
                                checkpoint_path,
                                application='unilm',
                                keep_tokens=keep_tokens)
model.summary()

# train model
o_inputs = Input(shape=(None, ))
train_model = Model(model.inputs + [o_inputs], model.outputs + [o_inputs])
y_true = train_model.inputs[2][:, 1:]
y_mask = train_model.inputs[1][:, 1:]
y_pred = train_model.outputs[0][:, :-1]
cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask)
train_model.add_loss(cross_entropy)
train_model.compile(Adam(1e-5))


class QuestionGenerator(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.wraps('probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        if val_acc > self.best_val_acc:
            self.best_val_acc = val_acc
            self.model.save_weights(self.savename)
        print(u'val_acc: %.5f, best_val_acc: %.5f\n' %
              (val_acc, self.best_val_acc))


# 加载预训练模型(3层)
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               return_keras_model=False,
                               num_hidden_layers=3,
                               prefix='Successor-')
x = Lambda(lambda x: x[:, 0])(bert.output)
x = Dense(units=num_classes, activation='softmax')(x)
model = Model(bert.inputs, x)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=AdaBelief(2e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
model.summary()

if __name__ == '__main__':
    # 训练
    evaluator = Evaluator('best_model.weights')
    model.fit_generator(train_generator.generator(),
                        steps_per_epoch=len(train_generator),
                        epochs=5,
                        callbacks=[evaluator])
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


model = build_transformer_model(
    config_path=config_path,
    checkpoint_path=checkpoint_path,
    with_mlm=True,
    # model='bert',  # 加载bert/Roberta/ernie
    model='nezha')

target_in = Input(shape=(None, ))
output = CrossEntropy(1)([target_in, model.output])

train_model = Model(model.inputs + [target_in], output)

AdamW = extend_with_weight_decay(Adam)
AdamWG = extend_with_gradient_accumulation(AdamW)

opt = AdamWG(learning_rate=1e-5,
             exclude_from_weight_decay=['Norm', 'bias'],
             grad_accum_steps=4)
train_model.compile(opt)
train_model.summary()

label_ids = np.array([tokenizer.encode(l)[0][1:-1] for l in labels])


def predict(x):
    if len(x) == 3:
Beispiel #8
0
    train_model.fit(
        pretrain_generator.generator(),
        steps_per_epoch=len(pretrain_generator),
        epochs=pretrain_epochs,
        callbacks=[checkpoint, csv_logger],
    )

    # build task fine-tune model
    # reload weights without mlm
    # bert_without_mlm = build_transformer_model(checkpoint_path=model_saved_path,
    #                                            config_path=config_path, with_mlm=False)

    idx = 11
    feed_forward_name = 'Transformer-%d-FeedForward' % idx
    bert_without_mlm = bert.layers[feed_forward_name]
    output = Lambda(lambda x: x[:, 0])(bert_without_mlm.output)
    output = Dense(num_classes, activation='softmax')(output)

    model = Model(bert.inputs, output)
    model.summary()

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=Adam(fine_tune_lr),
                  metrics=['acc'])

    evaluator = Evaluator()
    model.fit_generator(train_generator.generator(),
                        steps_per_epoch=len(train_generator),
                        epochs=fine_tune_epochs,
                        callbacks=[evaluator])
Beispiel #9
0
def build_transformer_model_with_mlm():
    """带mlm的bert模型
    """
    bert = build_transformer_model(
        config_path,
        with_mlm='linear',
        #         with_nsp=True,
        model='bert',
        return_keras_model=False,
        #         keep_tokens=keep_tokens
    )
    proba = bert.model.output
    #     print(proba)
    # 辅助输入
    token_ids = Input(shape=(None, ), dtype='int64', name='token_ids')  # 目标id
    is_masked = Input(shape=(None, ), dtype=K.floatx(),
                      name='is_masked')  # mask标记

    #     nsp_label = Input(shape=(None, ), dtype='int64', name='nsp')  # nsp

    def mlm_loss(inputs):
        """计算loss的函数,需要封装为一个层
        """
        y_true, y_pred, mask = inputs
        #         _, y_pred = y_pred
        loss = K.sparse_categorical_crossentropy(y_true,
                                                 y_pred,
                                                 from_logits=True)
        loss = K.sum(loss * mask) / (K.sum(mask) + K.epsilon())
        return loss

    def nsp_loss(inputs):
        """计算nsp loss的函数,需要封装为一个层
        """
        y_true, y_pred = inputs
        #         y_pred, _ = y_pred
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.mean(loss)
        return loss

    def mlm_acc(inputs):
        """计算准确率的函数,需要封装为一个层
        """
        y_true, y_pred, mask = inputs
        #         _, y_pred = y_pred
        y_true = K.cast(y_true, K.floatx())
        acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        acc = K.sum(acc * mask) / (K.sum(mask) + K.epsilon())
        return acc

    def nsp_acc(inputs):
        """计算准确率的函数,需要封装为一个层
        """
        y_true, y_pred = inputs
        y_pred, _ = y_pred
        y_true = K.cast(y_true, K.floatx)
        acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        acc = K.mean(acc)
        return acc

    mlm_loss = Lambda(mlm_loss, name='mlm_loss')([token_ids, proba, is_masked])
    mlm_acc = Lambda(mlm_acc, name='mlm_acc')([token_ids, proba, is_masked])
    #     nsp_loss = Lambda(nsp_loss, name='nsp_loss')([nsp_label, proba])
    #     nsp_acc = Lambda(nsp_acc, name='nsp_acc')([nsp_label, proba])

    train_model = Model(bert.model.inputs + [token_ids, is_masked],
                        [mlm_loss, mlm_acc])

    loss = {
        'mlm_loss': lambda y_true, y_pred: y_pred,
        'mlm_acc': lambda y_true, y_pred: K.stop_gradient(y_pred),
        #         'nsp_loss': lambda y_true, y_pred: y_pred,
        #         'nsp_acc': lambda y_true, y_pred: K.stop_gradient(y_pred),
    }

    return bert, train_model, loss
            K.shape(y_true)[0], K.floatx())


bert = build_transformer_model(checkpoint_path=checkpoint_path,
                               config_path=config_path,
                               with_pool='linear',
                               application='unilm',
                               keep_tokens=keep_tokens,
                               return_keras_model=False)
label_inputs = Input(shape=(None, ), name='label_inputs')

pooler = bert.model.outputs[0]
classification_output = Dense(units=num_classes,
                              activation='softmax',
                              name='classifier')(pooler)
classifier = Model(bert.model.inputs, classification_output)

seq2seq = Model(bert.model.inputs, bert.model.outputs[1])

outputs = TotalLoss([2])(bert.model.inputs + bert.model.outputs)
# outputs = Dense(num_classes, activation='softmax')(outputs)
train_model = Model(bert.model.inputs, [classification_output, outputs])
train_model.compile(loss=['sparse_categorical_crossentropy', None],
                    optimizer=Adam(1e-5),
                    metrics=['acc'])
train_model.summary()


def evaluate(val_data=valid_generator):
    total = 0.
    right = 0.
Beispiel #11
0
                                      checkpoint_path=checkpoint_path,
                                      return_keras_model=False,
                                      prefix='Predecessor-')

# 加载预训练模型(3层)
successor = build_transformer_model(config_path=config_path,
                                    checkpoint_path=checkpoint_path,
                                    return_keras_model=False,
                                    num_hidden_layers=3,
                                    prefix='Successor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Lambda(lambda x: x[:, 0])(x_in)
x = Dense(units=num_classes, activation='softmax')(x)
classifier = Model(x_in, x)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.output))
predecessor_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(2e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
predecessor_model.summary()

successor_model = Model(successor.inputs, classifier(successor.output))
successor_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(2e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
                yield [batch_token_ids, batch_segment_ids], batch_labels

                batch_token_ids, batch_segment_ids, batch_labels = [], [], []


train_generator = data_generator(data=train_data, batch_size=batch_size)
val_generator = data_generator(valid_data, batch_size)

# build model
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               num_hidden_layers=num_hidden_layers)
output = Lambda(lambda x: x[:, 0])(bert.output)
output = Dense(num_classes, activation='softmax')(output)

model = Model(bert.inputs, output)
model.summary()

model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(lr),
              metrics=['acc'])


def evaluate(data):
    total, right = 0., 0.
    for x_true, y_true in tqdm(data):
        y_pred = model.predict(x_true).argmax(axis=1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()
Beispiel #13
0
    def call(self, inputs):
        sim_loss = self.compute_loss_of_similarity(inputs)
        self.add_loss(sim_loss)
        self.add_metric(sim_loss, 'similarity loss')
        return super(DenseSimLoss, self).call(inputs)


# build model
model = build_transformer_model(
    config_path,
    checkpoint_path,
)
output = Lambda(lambda x: x[:, 0])(model.output)
output = DenseSimLoss(scale=1, units=num_classes, activation='softmax')(output)
model = Model(model.inputs, output)
model.summary()


def evaluate(data, model):
    total, right = 0., 0.
    for x_true, y_true in tqdm(data):
        y_pred = model.predict(x_true).argmax(axis=1)
        y_true = y_true[:, 0]
        total += len(y_true)
        right += (y_true == y_pred).sum()
    return right / total


class Evaluator(keras.callbacks.Callback):
    def __init__(self, savename):
        y_mask = y_mask[:, 1:]  # segment_ids,刚好指示了要预测的部分
        y_pred = y_pred[:, :-1]  # 预测序列,错开一位
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.sum(loss * y_mask) / K.sum(y_mask)
        return loss


# build model
model = build_transformer_model(config_path,
                                checkpoint_path,
                                application='unilm',
                                keep_tokens=keep_tokens)

output = CrossEntropy(2)(model.inputs + model.outputs)

model = Model(model.inputs, output)
model.compile(optimizer=Adam(1e-5))
model.summary()


class QuestionAnswerGenerator(AutoRegressiveDecoder):
    """seq2seq解码器
    """
    @AutoRegressiveDecoder.wraps('probas')
    def predict(self, inputs, output_ids, states):
        token_ids, segment_ids = inputs
        token_ids = np.concatenate([token_ids, output_ids], 1)
        segment_ids = np.concatenate(
            [segment_ids, np.ones_like(output_ids)], 1)
        ret = model.predict([token_ids, segment_ids])[:, -1]
        return ret
Beispiel #15
0
        loss = K.sum(loss * y_mask) / K.sum(y_mask)

        acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        acc = K.sum(acc * y_mask) / K.sum(y_mask)
        self.add_metric(acc, name='acc')
        return loss


model = build_transformer_model(config_path=config_path,
                                checkpoint_path=checkpoint_path,
                                with_mlm=True)

target_in = Input(shape=(None, ))
output = CrossEntropy(1)([target_in, model.output])

train_model = Model(model.inputs + [target_in], output)
train_model.compile(optimizer=Adam(1e-5))
train_model.summary()


def evaluate(data):
    label_ids = np.array([tokenizer.encode(l)[0][1:-1] for l in labels])
    #     print(label_ids)
    total, right = 0., 0.
    for x, _ in tqdm(data):
        x, y_true = x[:2], x[2]
        y_pred = model.predict(x)[:, mask_idx]
        y_pred = y_pred[:, 0, label_ids[:, 0]] * y_pred[:, 1, label_ids[:, 1]]
        y_pred = y_pred.argmax(axis=1)
        y_true = np.array(
            [labels.index(tokenizer.decode(y)) for y in y_true[:, mask_idx]])
Beispiel #16
0
            if len(batch_tokens) >= self.batch_size or is_end:
                batch_tokens = pad_sequences(batch_tokens)
                batch_segs = pad_sequences(batch_segs)
                batch_labels = pad_sequences(batch_labels)
                yield [batch_tokens, batch_segs], batch_labels
                batch_tokens, batch_segs, batch_labels = [], [], []


model = build_transformer_model(config_path=bert_config,
                                checkpoint_path=bert_checkpoint)
output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1)
output = model.get_layer(output_layer).output
output = Dense(num_labes)(output)
CRF = ConditionalRandomField(lr_multi)
output = CRF(output)
model = Model(model.input, output)
model.summary()


class WordSeg(ViterbiDecoder):
    def segment(self, data):
        tokens = tokenizer.tokenize(data)
        while len(tokens) > 512:
            tokens.pop(-2)
        mapping = tokenizer.rematch(data, tokens)
        token_ids = tokenizer.tokens_to_ids(tokens)
        segs = [0] * len(token_ids)
        pre = model.predict([[token_ids], [segs]])[0]
        labels = self.decode(pre)

        words = []
Beispiel #17
0
bert = build_transformer_model(config_path=config_path,
                               checkpoint_path=checkpoint_path,
                               num_hidden_layers=num_hidden_layers,
                               return_keras_model=False,
                               )
output = Lambda(lambda x: x[:, 0])(bert.output)

y_in = Input(shape=(None,))

# scale_output = Dense(256, kernel_initializer=bert.initializer)(output)
# logits = Dense(num_classes)(output)
scl_output = SupervisedContrastiveLearning(alpha=0.05, T=0.05, output_idx=0)([output, y_in])

clf_output = Dense(num_classes, activation='softmax')(output)
clf_ce = CrossEntropy(output_idx=0, alpha=0.95)([clf_output, y_in])
model = Model(bert.inputs, clf_output)
model.summary()

train_model = Model(bert.inputs + [y_in], [scl_output, clf_ce])
train_model.compile(optimizer=Adam(lr))


if __name__ == '__main__':
    evaluator = Evaluator()
    train_model.fit_generator(train_generator.generator(),
                              steps_per_epoch=len(train_generator),
                              epochs=epochs,
                              callbacks=[evaluator])

    # tsne
    from sklearn.manifold import TSNE
Beispiel #18
0
    def call(self, inputs):
        return super(ScaleDense, self).call(inputs)


# 加载预训练模型(12层)
predecessor = build_transformer_model(config_path=config_path,
                                      checkpoint_path=checkpoint_path,
                                      return_keras_model=False,
                                      prefix='Predecessor-')

# 判别模型
x_in = Input(shape=K.int_shape(predecessor.output)[1:])
x = Lambda(lambda x: x[:, 0])(x_in)
x = Dense(units=num_classes, activation='softmax')(x)
classifier = Model(x_in, x)

predecessor_model = Model(predecessor.inputs, classifier(predecessor.output))
predecessor_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(1e-5),  # 用足够小的学习率
    metrics=['sparse_categorical_accuracy'],
)
predecessor_model.summary()

# predecessor_model_3
output = predecessor_model.layers[31].output  # 第3层transform
output = Lambda(lambda x: x[:, 0])(output)
dense = ScaleDense(lr_multiplier=5,
                   units=num_classes,
                   activation='softmax',
Beispiel #19
0
def build_transformer_model_with_mlm(version='pre'):
    """带mlm的bert模型
    """
    assert version in ['pre', 'post', 'rezero']
    if version == 'rezero':
        attention_name = 'Transformer-%d-MultiHeadSelfAttention'
        feed_forward_name = 'Transformer-%d-FeedForward'
        skip_weights = []
        for i in range(12):
            skip_weights.append(feed_forward_name % i + '-Norm')
            skip_weights.append(feed_forward_name % i + '-ReWeight')
            skip_weights.append(attention_name % i + '-Norm')
            skip_weights.append(attention_name % i + '-ReWeight')

        bert = build_transformer_model(
            config_path,
            with_mlm='linear',
            model='rezero',
            return_keras_model=False,
            skip_weights_from_checkpoints=skip_weights,
            use_layernorm=None,
            reweight_trainable=True,
            init_reweight=0.,
        )
    else:
        bert = build_transformer_model(
            config_path,
            with_mlm='linear',
            model='rezero',
            return_keras_model=False,
            #         skip_weights_from_checkpoints=skip_weights,
            use_layernorm=version,
            reweight_trainable=False,
            init_reweight=1.,
        )

    proba = bert.model.output
    #     print(proba)
    # 辅助输入
    token_ids = Input(shape=(None, ), dtype='int64', name='token_ids')  # 目标id
    is_masked = Input(shape=(None, ), dtype=K.floatx(),
                      name='is_masked')  # mask标记

    #     nsp_label = Input(shape=(None, ), dtype='int64', name='nsp')  # nsp

    def mlm_loss(inputs):
        """计算loss的函数,需要封装为一个层
        """
        y_true, y_pred, mask = inputs
        #         _, y_pred = y_pred
        loss = K.sparse_categorical_crossentropy(y_true,
                                                 y_pred,
                                                 from_logits=True)
        loss = K.sum(loss * mask) / (K.sum(mask) + K.epsilon())
        return loss

    def nsp_loss(inputs):
        """计算nsp loss的函数,需要封装为一个层
        """
        y_true, y_pred = inputs
        #         y_pred, _ = y_pred
        loss = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss = K.mean(loss)
        return loss

    def mlm_acc(inputs):
        """计算准确率的函数,需要封装为一个层
        """
        y_true, y_pred, mask = inputs
        #         _, y_pred = y_pred
        y_true = K.cast(y_true, K.floatx())
        acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        acc = K.sum(acc * mask) / (K.sum(mask) + K.epsilon())
        return acc

    def nsp_acc(inputs):
        """计算准确率的函数,需要封装为一个层
        """
        y_true, y_pred = inputs
        y_pred, _ = y_pred
        y_true = K.cast(y_true, K.floatx)
        acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred)
        acc = K.mean(acc)
        return acc

    mlm_loss = Lambda(mlm_loss, name='mlm_loss')([token_ids, proba, is_masked])
    mlm_acc = Lambda(mlm_acc, name='mlm_acc')([token_ids, proba, is_masked])
    #     nsp_loss = Lambda(nsp_loss, name='nsp_loss')([nsp_label, proba])
    #     nsp_acc = Lambda(nsp_acc, name='nsp_acc')([nsp_label, proba])

    train_model = Model(bert.model.inputs + [token_ids, is_masked],
                        [mlm_loss, mlm_acc])

    loss = {
        'mlm_loss': lambda y_true, y_pred: y_pred,
        'mlm_acc': lambda y_true, y_pred: K.stop_gradient(y_pred),
        #         'nsp_loss': lambda y_true, y_pred: y_pred,
        #         'nsp_acc': lambda y_true, y_pred: K.stop_gradient(y_pred),
    }

    return bert, train_model, loss
Beispiel #20
0
output = DGCNN(dilation_rate=5, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=2, dropout_rate=0.1)(output)
output = DGCNN(dilation_rate=1, dropout_rate=0.1)(output)
output = SinCosPositionEmbedding(K.int_shape(output)[-1])(output)
att = AttentionPooling1D()(output)
output = ConcatSeq2Vec()([output, att])
# att = K.expand_dims(att, 1)
# output = Add()([output, att])

output = Dropout(0.3)(output)

output = Dense(2)(output)
output = MaskedSoftmax()(output)
output = Permute((2, 1), name='permute')(output)

model = Model(inputs, output)
model.summary()


def sparse_categorical_crossentropy(y_true, y_pred):
    # y_true需要重新明确一下shape和dtype
    y_true = K.reshape(y_true, K.shape(y_pred)[:-1])
    y_true = K.cast(y_true, 'int32')
    y_true = K.one_hot(y_true, K.shape(y_pred)[2])
    # 计算交叉熵
    return K.mean(K.categorical_crossentropy(y_true, y_pred))


def sparse_accuracy(y_true, y_pred):
    # y_true需要重新明确一下shape和dtype
    y_true = K.reshape(y_true, K.shape(y_pred)[:-1])