def __init__(self, tokenizer: Tokenizer, maximum_position_encoding=1000, num_layers=6, d_model=512, num_heads=8, dff=2048, dropout_rate=0.1): self.tokenizer = tokenizer self.d_model = d_model vocab_size = tokenizer.get_num_tokens() self.learning_rate = CustomSchedule(d_model) self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none') self.train_loss = tf.keras.metrics.Mean(name='train_loss') self.train_accuracy = tf.keras.metrics.Mean(name='train_accuracy') self.transformer = Transformer(num_layers, d_model, num_heads, dff, vocab_size, maximum_position_encoding, dropout_rate) self.checkpoint_path = './checkpoints/train' self.ckpt = tf.train.Checkpoint(transformer=self.transformer, optimizer=self.optimizer) self.ckpt_manager = tf.train.CheckpointManager(self.ckpt, self.checkpoint_path, max_to_keep=5) if self.ckpt_manager.latest_checkpoint: self.ckpt.restore(self.ckpt_manager.latest_checkpoint)