Beispiel #1
0
tpu_address = 'grpc://xxx.xxx.xxx.xxx:8470'  # 如果用多GPU跑,直接设为None
which_optimizer = 'lamb'  # adam 或 lamb,均自带weight decay
lr_schedule = {
    num_warmup_steps * grad_accum_steps: 1.,
    num_train_steps * grad_accum_steps: 0.,
}

# 准备变量
Input = keras.layers.Input
Lambda = keras.layers.Lambda
Model = keras.models.Model

# 读取数据集,构建数据张量
dataset = TrainingDataset.load_tfrecord(
    record_names=corpus_paths,
    sequence_length=sequence_length,
    batch_size=batch_size // grad_accum_steps,
)


def build_train_bert_model():
    """构建训练模型,通用于TPU/GPU
    注意全程要用keras标准的层写法,一些比较灵活的“移花接木”式的
    写法可能会在TPU上训练失败。此外,要注意的是TPU并非支持所有
    tensorflow算子,尤其不支持动态(变长)算子,因此编写相应运算
    时要格外留意。
    """
    bert = build_bert_model(config_path,
                            with_mlm='linear',
                            application='lm',
                            return_keras_model=False)
Beispiel #2
0
exclude_from_weight_decay = ['Norm', 'bias']


# 准备一些变量
Input = keras.layers.Input
Lambda = keras.layers.Lambda
Model = keras.models.Model
sparse_categorical_accuracy = keras.metrics.sparse_categorical_accuracy
ModelCheckpoint = keras.callbacks.ModelCheckpoint
CSVLogger = keras.callbacks.CSVLogger


# 读取数据集,构建数据张量
dataset = TrainingDataset.load_tfrecord(
    record_names=corpus_path,
    sequence_length=sequence_length,
    batch_size=batch_size,
)


# 构建优化器

class PiecewiseLinear(keras.optimizers.schedules.LearningRateSchedule):
    """为tf.keras的OptimizerV2所写的分段线性学习率
    """
    def __init__(self, schedule, name=None):
        super(PiecewiseLinear, self).__init__()
        self.schedule = {int(i): j for i, j in schedule.items()}
        self.name = name

    def __call__(self, step):