def create_optimizer(name, d_model, lamb=0.05, warmup_steps=4000): if name == "transformer_adam": learning_rate = TransformerSchedule(d_model, warmup_steps) optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) elif name == "transformer_sgd": learning_rate = TransformerSchedule(d_model, warmup_steps) optimizer = tf.keras.optimizers.SGD(learning_rate, momentum=0.99, nesterov=True) elif name == "san": learning_rate = SANSchedule(lamb, d_model, warmup_steps) optimizer = tf.keras.optimizers.SGD(learning_rate, momentum=0.99, nesterov=True) else: raise ValueError( "optimizer name must be either 'transformer' or 'san'") return optimizer
with strategy.scope(): # build model conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes) conformer.make(speech_featurizer.shape, prediction_shape=text_featurizer.prepand_shape, batch_size=global_batch_size) if args.pretrained: conformer.load_weights(args.pretrained, by_name=True, skip_mismatch=True) conformer.summary(line_length=100) optimizer = tf.keras.optimizers.Adam( TransformerSchedule( d_model=conformer.dmodel, warmup_steps=config.learning_config.optimizer_config.pop( "warmup_steps", 10000), max_lr=(0.05 / math.sqrt(conformer.dmodel))), **config.learning_config.optimizer_config) conformer.compile(optimizer=optimizer, experimental_steps_per_execution=args.spx, global_batch_size=global_batch_size, blank=text_featurizer.blank) callbacks = [ tf.keras.callbacks.ModelCheckpoint( **config.learning_config.running_config.checkpoint), tf.keras.callbacks.experimental.BackupAndRestore( config.learning_config.running_config.states_dir), tf.keras.callbacks.TensorBoard( **config.learning_config.running_config.tensorboard)
with strategy.scope(): # build model contextnet = ContextNet(**config.model_config, vocabulary_size=text_featurizer.num_classes) contextnet._build(speech_featurizer.shape, prediction_shape=text_featurizer.prepand_shape, batch_size=global_batch_size) contextnet.summary(line_length=120) if args.saved: contextnet.load_weights(args.saved, by_name=True, skip_mismatch=True) optimizer = tf.keras.optimizers.Adam( TransformerSchedule(d_model=contextnet.dmodel, warmup_steps=config.learning_config. optimizer_config["warmup_steps"], max_lr=(0.05 / math.sqrt(contextnet.dmodel))), beta_1=config.learning_config.optimizer_config["beta1"], beta_2=config.learning_config.optimizer_config["beta2"], epsilon=config.learning_config.optimizer_config["epsilon"]) contextnet.compile(optimizer=optimizer, experimental_steps_per_execution=args.spx, global_batch_size=global_batch_size, blank=text_featurizer.blank) callbacks = [ tf.keras.callbacks.ModelCheckpoint( **config.learning_config.running_config.checkpoint), tf.keras.callbacks.experimental.BackupAndRestore( config.learning_config.running_config.states_dir),
conformer_trainer = TransducerTrainer( config=config.learning_config.running_config, text_featurizer=text_featurizer, strategy=strategy) with conformer_trainer.strategy.scope(): # build model conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes) conformer._build(speech_featurizer.shape) conformer.summary(line_length=120) optimizer_config = config.learning_config.optimizer_config optimizer = tf.keras.optimizers.Adam(TransformerSchedule( d_model=conformer.dmodel, warmup_steps=optimizer_config["warmup_steps"], max_lr=(0.05 / math.sqrt(conformer.dmodel))), beta_1=optimizer_config["beta1"], beta_2=optimizer_config["beta2"], epsilon=optimizer_config["epsilon"]) conformer_trainer.compile(model=conformer, optimizer=optimizer, max_to_keep=args.max_ckpts) conformer_trainer.fit(train_dataset, eval_dataset, train_bs=args.tbs, eval_bs=args.ebs)
def main(): parser = argparse.ArgumentParser(prog="Conformer Training") parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file") parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep") parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica") parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica") parser.add_argument("--acs", type=int, default=None, help="Train accumulation steps") parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training") parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision") parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords") parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords") parser.add_argument( "--train-dir", '-td', nargs='*', default=["en_ng_male_train.tsv", "en_ng_female_train.tsv"]) parser.add_argument("--train-reg-dir", '-trd', nargs='*', default=[ "libritts_train-clean-100.tsv", "libritts_train-clean-360.tsv", "libritts_train-other-500.tsv" ]) parser.add_argument( "--dev-dir", '-dd', nargs='*', default=["en_ng_male_eval.tsv", "en_ng_female_eval.tsv"]) parser.add_argument("--dev-reg-dir", '-drd', nargs='*', default=["libritts_test-other.tsv"]) args = parser.parse_args() tf.config.optimizer.set_experimental_options( {"auto_mixed_precision": args.mxp}) strategy = setup_strategy(args.devices) config = Config(args.config, learning=True) config.train_dir = args.train_dir config.dev_dir = args.dev_dir config.train_reg_dir = args.train_reg_dir config.dev_reg_dir = args.dev_reg_dir with open(config.speech_config) as f: speech_config = yaml.load(f, Loader=yaml.Loader) speech_featurizer = TFSpeechFeaturizer(speech_config) if args.subwords and os.path.exists(args.subwords): print("Loading subwords ...") text_featurizer = SubwordFeaturizer.load_from_file( config.decoder_config, args.subwords) else: print("Generating subwords ...") text_featurizer = SubwordFeaturizer.build_from_corpus( config.decoder_config, corpus_files=args.subwords_corpus) text_featurizer.save_to_file(args.subwords) train_dataset = Dataset(data_paths=config.train_dir, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, stage="train", cache=False, shuffle=False) train_reg_dataset = DatasetInf( data_paths=config.train_reg_dir, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, stage="train", cache=False, shuffle=False) eval_dataset = Dataset(data_paths=config.dev_dir, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, stage="eval", cache=False, shuffle=False) eval_reg_dataset = DatasetInf( data_paths=config.dev_reg_dir, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, augmentations=config.learning_config.augmentations, stage="eval", cache=False, shuffle=False) conformer_trainer = MultiReaderTransducerTrainer( config=config.learning_config.running_config, text_featurizer=text_featurizer, strategy=strategy) with conformer_trainer.strategy.scope(): # build model conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes) conformer._build(speech_featurizer.shape) conformer.summary(line_length=120) optimizer = tf.keras.optimizers.Adam( TransformerSchedule(d_model=conformer.dmodel, warmup_steps=config.learning_config. optimizer_config["warmup_steps"], max_lr=(0.05 / math.sqrt(conformer.dmodel))), beta_1=config.learning_config.optimizer_config["beta1"], beta_2=config.learning_config.optimizer_config["beta2"], epsilon=config.learning_config.optimizer_config["epsilon"]) conformer_trainer.compile(model=conformer, optimizer=optimizer, max_to_keep=args.max_ckpts) conformer_trainer.fit( train_dataset, train_reg_dataset, # alpha for regularising dataset; alpha = 1 for training dataset 1., eval_dataset, eval_reg_dataset, train_bs=args.tbs, eval_bs=args.ebs, train_acs=args.acs)
conformer_trainer = TransducerTrainerGA( config=config["learning_config"]["running_config"], text_featurizer=text_featurizer, strategy=strategy ) with conformer_trainer.strategy.scope(): # build model conformer = Conformer( **config["model_config"], vocabulary_size=text_featurizer.num_classes ) conformer._build(speech_featurizer.shape) conformer.summary(line_length=120) optimizer_config = config["learning_config"]["optimizer_config"] optimizer = tf.keras.optimizers.Adam( TransformerSchedule( d_model=config["model_config"]["dmodel"], warmup_steps=optimizer_config["warmup_steps"], max_lr=(0.05 / math.sqrt(config["model_config"]["dmodel"])) ), beta_1=optimizer_config["beta1"], beta_2=optimizer_config["beta2"], epsilon=optimizer_config["epsilon"] ) conformer_trainer.compile(model=conformer, optimizer=optimizer, max_to_keep=args.max_ckpts) conformer_trainer.fit(train_dataset, eval_dataset, train_bs=args.tbs, eval_bs=args.ebs)
# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import tensorflow as tf import matplotlib.pyplot as plt from tensorflow_asr.optimizers.schedules import SANSchedule, TransformerSchedule lr = SANSchedule(lamb=0.05, d_model=512, warmup_steps=4000) plt.plot(lr(tf.range(40000, dtype=tf.float32))) plt.ylabel("Learning Rate") plt.xlabel("Train Step") plt.show() lr = TransformerSchedule(d_model=144, warmup_steps=10000) plt.plot(lr(tf.range(2000000, dtype=tf.float32))) plt.ylabel("Learning Rate") plt.xlabel("Train Step") plt.show()
conformer_trainer = TransducerTrainerGA( config=config.learning_config.running_config, text_featurizer=text_featurizer, strategy=strategy) with conformer_trainer.strategy.scope(): # build model conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes) conformer._build(speech_featurizer.shape) conformer.summary(line_length=120) optimizer = tf.keras.optimizers.Adam( TransformerSchedule( d_model=config.model_config["encoder_dmodel"], warmup_steps=config.learning_config. optimizer_config["warmup_steps"], max_lr=(0.05 / math.sqrt(config.model_config["encoder_dmodel"]))), beta_1=config.learning_config.optimizer_config["beta1"], beta_2=config.learning_config.optimizer_config["beta2"], epsilon=config.learning_config.optimizer_config["epsilon"]) conformer_trainer.compile(model=conformer, optimizer=optimizer, max_to_keep=args.max_ckpts) conformer_trainer.fit(train_dataset, eval_dataset, train_bs=args.tbs, eval_bs=args.ebs, train_acs=args.acs)
def main( config: str = DEFAULT_YAML, tfrecords: bool = False, sentence_piece: bool = False, subwords: bool = True, bs: int = None, spx: int = 1, metadata: str = None, static_length: bool = False, devices: list = [0], mxp: bool = False, pretrained: str = None, ): tf.keras.backend.clear_session() tf.config.optimizer.set_experimental_options({"auto_mixed_precision": mxp}) strategy = env_util.setup_strategy(devices) config = Config(config) speech_featurizer, text_featurizer = featurizer_helpers.prepare_featurizers( config=config, subwords=subwords, sentence_piece=sentence_piece, ) train_dataset, eval_dataset = dataset_helpers.prepare_training_datasets( config=config, speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, tfrecords=tfrecords, metadata=metadata, ) if not static_length: speech_featurizer.reset_length() text_featurizer.reset_length() train_data_loader, eval_data_loader, global_batch_size = dataset_helpers.prepare_training_data_loaders( config=config, train_dataset=train_dataset, eval_dataset=eval_dataset, strategy=strategy, batch_size=bs, ) with strategy.scope(): contextnet = ContextNet(**config.model_config, vocabulary_size=text_featurizer.num_classes) contextnet.make(speech_featurizer.shape, prediction_shape=text_featurizer.prepand_shape, batch_size=global_batch_size) if pretrained: contextnet.load_weights(pretrained, by_name=True, skip_mismatch=True) contextnet.summary(line_length=100) optimizer = tf.keras.optimizers.Adam( TransformerSchedule( d_model=contextnet.dmodel, warmup_steps=config.learning_config.optimizer_config.pop("warmup_steps", 10000), max_lr=(0.05 / math.sqrt(contextnet.dmodel)), ), **config.learning_config.optimizer_config ) contextnet.compile( optimizer=optimizer, experimental_steps_per_execution=spx, global_batch_size=global_batch_size, blank=text_featurizer.blank, ) callbacks = [ tf.keras.callbacks.ModelCheckpoint(**config.learning_config.running_config.checkpoint), tf.keras.callbacks.experimental.BackupAndRestore(config.learning_config.running_config.states_dir), tf.keras.callbacks.TensorBoard(**config.learning_config.running_config.tensorboard), ] contextnet.fit( train_data_loader, epochs=config.learning_config.running_config.num_epochs, validation_data=eval_data_loader, callbacks=callbacks, steps_per_epoch=train_dataset.total_steps, validation_steps=eval_dataset.total_steps if eval_data_loader else None, )