def main(): args, name = parse_args() log_dir = name if args.work_dir: log_dir = os.path.join(args.work_dir, name) # instantiate Neural Factory with supported backend neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=log_dir, checkpoint_dir=args.checkpoint_dir, create_tb_writer=args.create_tb_writer, files_to_copy=[args.model_config, __file__], cudnn_benchmark=args.cudnn_benchmark, tensorboard_dir=args.tensorboard_dir, ) if args.local_rank is not None: logging.info('Doing ALL GPU') yaml = YAML(typ="safe") with open(args.model_config) as file: tacotron2_params = yaml.load(file) labels = tacotron2_params["labels"] # instantiate neural modules neural_modules = create_NMs(args.model_config, labels) # build dags train_loss, callbacks, steps_per_epoch = create_all_dags( neural_factory=neural_factory, neural_modules=neural_modules, tacotron2_config_file=args.model_config, train_dataset=args.train_dataset, batch_size=args.batch_size, eval_freq=args.eval_freq, checkpoint_save_freq=args.checkpoint_save_freq, eval_datasets=args.eval_datasets, eval_batch_size=args.eval_batch_size, labels=labels, ) # train model total_steps = args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch neural_factory.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=CosineAnnealing(total_steps, min_lr=args.min_lr), optimizer=args.optimizer, optimization_params={ "num_epochs": args.num_epochs, "max_steps": args.max_steps, "lr": args.lr, "weight_decay": args.weight_decay, "grad_norm_clip": args.grad_norm_clip, }, batches_per_step=args.iter_per_step, )
def main(): args = parse_args() name = construct_name( args.exp_name, args.lr, args.batch_size, args.max_steps, args.num_epochs, args.weight_decay, args.optimizer, args.iter_per_step, ) log_dir = name if args.work_dir: log_dir = os.path.join(args.work_dir, name) # instantiate Neural Factory with supported backend neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=log_dir, checkpoint_dir=args.checkpoint_dir, create_tb_writer=args.create_tb_writer, files_to_copy=[args.model_config, __file__], cudnn_benchmark=args.cudnn_benchmark, tensorboard_dir=args.tensorboard_dir, ) args.num_gpus = neural_factory.world_size checkpoint_dir = neural_factory.checkpoint_dir if args.local_rank is not None: logging.info('Doing ALL GPU') # build dags train_loss, callbacks, steps_per_epoch = create_all_dags( args, neural_factory) # train model neural_factory.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=CosineAnnealing( args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch, warmup_steps=args.warmup_steps, ), optimizer=args.optimizer, optimization_params={ "num_epochs": args.num_epochs, "max_steps": args.max_steps, "lr": args.lr, "betas": (args.beta1, args.beta2), "weight_decay": args.weight_decay, "grad_norm_clip": None, }, batches_per_step=args.iter_per_step, )
def main(): args = parse_args() print(args) emb_size = 1024 name = construct_name( args.exp_name, args.lr, args.batch_size, args.num_epochs, args.weight_decay, args.optimizer, emb_size=emb_size, ) work_dir = name if args.work_dir: work_dir = os.path.join(args.work_dir, name) # instantiate Neural Factory with supported backend neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=work_dir, checkpoint_dir=args.checkpoint_dir + "/" + args.exp_name, create_tb_writer=args.create_tb_writer, files_to_copy=[args.model_config, __file__], random_seed=42, cudnn_benchmark=args.cudnn_benchmark, tensorboard_dir=args.tensorboard_dir + "/" + name, ) args.num_gpus = neural_factory.world_size args.checkpoint_dir = neural_factory.checkpoint_dir if args.local_rank is not None: logging.info("Doing ALL GPU") # build dags (train_loss, callbacks, steps_per_epoch, loss_test, logits_test, label_test,) = create_all_dags( args, neural_factory ) # train model neural_factory.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=CosineAnnealing( args.num_epochs * steps_per_epoch, warmup_steps=0.1 * args.num_epochs * steps_per_epoch, ), optimizer=args.optimizer, optimization_params={ "num_epochs": args.num_epochs, "lr": args.lr, "betas": (args.beta1, args.beta2), "weight_decay": args.weight_decay, "grad_norm_clip": None, }, batches_per_step=args.iter_per_step, synced_batchnorm=args.synced_bn, synced_batchnorm_groupsize=args.synced_bn_groupsize, )
callback_eval = nemo.core.EvaluatorCallback( eval_tensors=[tgt_, eval_loss, beam_trans, sent_ids_], user_iter_callback=lambda x, y: eval_iter_callback(x, y, tokenizer), user_epochs_done_callback=lambda x: eval_epochs_done_callback( x, validation_dataset=valid_dataset), eval_step=args.eval_freq, tb_writer=tb_writer) # callback which saves checkpoints once in a while callback_ckpt = nemo.core.CheckpointCallback( folder=args.checkpoint_dir, step_freq=args.checkpoint_save_freq, checkpoints_to_keep=1) # define learning rate decay policy lr_policy = CosineAnnealing(args.max_steps, warmup_steps=args.warmup_steps) # define and launch training algorithm (optimizer) max_num_epochs = 0 if args.interactive else args.num_epochs optimizer = neural_factory.get_trainer() callbacks = [callback_ckpt] if not args.interactive: callbacks.extend([callback_train, callback_eval]) optimizer.train(tensors_to_optimize=[train_loss], callbacks=callbacks, optimizer=args.optimizer, lr_policy=lr_policy, optimization_params={ "num_epochs": max_num_epochs,
def main(): parser = argparse.ArgumentParser(parents=[nm_argparse.NemoArgParser()], description='AN4 ASR', conflict_handler='resolve') # Overwrite default args parser.add_argument("--train_dataset", type=str, help="training dataset path") parser.add_argument("--eval_datasets", type=str, nargs=1, help="validation dataset path") # Create new args parser.add_argument("--lm", default="./an4-lm.3gram.binary", type=str) parser.add_argument("--test_after_training", action='store_true') parser.add_argument("--momentum", type=float) parser.add_argument("--beta1", default=0.95, type=float) parser.add_argument("--beta2", default=0.25, type=float) parser.set_defaults( model_config="./configs/jasper_an4.yaml", train_dataset="/home/mrjenkins/TestData/an4_dataset/an4_train.json", eval_datasets="/home/mrjenkins/TestData/an4_dataset/an4_val.json", work_dir="./tmp", checkpoint_dir="./tmp", optimizer="novograd", num_epochs=50, batch_size=32, eval_batch_size=16, lr=0.02, weight_decay=0.005, checkpoint_save_freq=1000, eval_freq=100, amp_opt_level="O1") args = parser.parse_args() betas = (args.beta1, args.beta2) wer_thr = 0.20 beam_wer_thr = 0.15 nf = nemo.core.NeuralModuleFactory(local_rank=args.local_rank, optimization_level=args.amp_opt_level, random_seed=0, log_dir=args.work_dir, checkpoint_dir=args.checkpoint_dir, create_tb_writer=True, cudnn_benchmark=args.cudnn_benchmark) tb_writer = nf.tb_writer checkpoint_dir = nf.checkpoint_dir args.checkpoint_dir = nf.checkpoint_dir # Load model definition yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) vocab = jasper_params['labels'] sample_rate = jasper_params['sample_rate'] # build train and eval model train_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) train_dl_params.update(jasper_params["AudioToTextDataLayer"]["train"]) del train_dl_params["train"] del train_dl_params["eval"] data_layer = nemo_asr.AudioToTextDataLayer( manifest_filepath=args.train_dataset, sample_rate=sample_rate, labels=vocab, batch_size=args.batch_size, **train_dl_params) num_samples = len(data_layer) total_steps = int(num_samples * args.num_epochs / args.batch_size) print("Train samples=", num_samples, "num_steps=", total_steps) data_preprocessor = nemo_asr.AudioPreprocessing( sample_rate=sample_rate, **jasper_params["AudioPreprocessing"]) # data_augmentation = nemo_asr.SpectrogramAugmentation( # **jasper_params['SpectrogramAugmentation'] # ) eval_dl_params = copy.deepcopy(jasper_params["AudioToTextDataLayer"]) eval_dl_params.update(jasper_params["AudioToTextDataLayer"]["eval"]) del eval_dl_params["train"] del eval_dl_params["eval"] data_layer_eval = nemo_asr.AudioToTextDataLayer( manifest_filepath=args.eval_datasets, sample_rate=sample_rate, labels=vocab, batch_size=args.eval_batch_size, **eval_dl_params) num_samples = len(data_layer_eval) nf.logger.info(f"Eval samples={num_samples}") jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_params["AudioPreprocessing"]["features"], **jasper_params["JasperEncoder"]) jasper_decoder = nemo_asr.JasperDecoderForCTC( feat_in=jasper_params["JasperEncoder"]["jasper"][-1]["filters"], num_classes=len(vocab)) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(vocab)) greedy_decoder = nemo_asr.GreedyCTCDecoder() # Training model audio, audio_len, transcript, transcript_len = data_layer() processed, processed_len = data_preprocessor(input_signal=audio, length=audio_len) encoded, encoded_len = jasper_encoder(audio_signal=processed, length=processed_len) log_probs = jasper_decoder(encoder_output=encoded) predictions = greedy_decoder(log_probs=log_probs) loss = ctc_loss(log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len) # Evaluation model audio_e, audio_len_e, transcript_e, transcript_len_e = data_layer_eval() processed_e, processed_len_e = data_preprocessor(input_signal=audio_e, length=audio_len_e) encoded_e, encoded_len_e = jasper_encoder(audio_signal=processed_e, length=processed_len_e) log_probs_e = jasper_decoder(encoder_output=encoded_e) predictions_e = greedy_decoder(log_probs=log_probs_e) loss_e = ctc_loss(log_probs=log_probs_e, targets=transcript_e, input_length=encoded_len_e, target_length=transcript_len_e) nf.logger.info("Num of params in encoder: {0}".format( jasper_encoder.num_weights)) # Callbacks to print info to console and Tensorboard train_callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss, predictions, transcript, transcript_len], print_func=lambda x: monitor_asr_train_progress(x, labels=vocab), get_tb_values=lambda x: [["loss", x[0]]], tb_writer=tb_writer, ) checkpointer_callback = nemo.core.CheckpointCallback( folder=checkpoint_dir, step_freq=args.checkpoint_save_freq) eval_tensors = [loss_e, predictions_e, transcript_e, transcript_len_e] eval_callback = nemo.core.EvaluatorCallback( eval_tensors=eval_tensors, user_iter_callback=lambda x, y: process_evaluation_batch( x, y, labels=vocab), user_epochs_done_callback=process_evaluation_epoch, eval_step=args.eval_freq, tb_writer=tb_writer) nf.train(tensors_to_optimize=[loss], callbacks=[train_callback, eval_callback, checkpointer_callback], optimizer=args.optimizer, lr_policy=CosineAnnealing(total_steps=total_steps), optimization_params={ "num_epochs": args.num_epochs, "max_steps": args.max_steps, "lr": args.lr, "momentum": args.momentum, "betas": betas, "weight_decay": args.weight_decay, "grad_norm_clip": None }, batches_per_step=args.iter_per_step) if args.test_after_training: # Create BeamSearch NM beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM( vocab=vocab, beam_width=64, alpha=2., beta=1.5, lm_path=args.lm, num_cpus=max(os.cpu_count(), 1)) beam_predictions = beam_search_with_lm(log_probs=log_probs_e, log_probs_length=encoded_len_e) eval_tensors.append(beam_predictions) evaluated_tensors = nf.infer(eval_tensors) greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab) references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab) wer = word_error_rate(hypotheses=greedy_hypotheses, references=references) nf.logger.info("Greedy WER: {:.2f}".format(wer * 100)) assert wer <= wer_thr, ( "Final eval greedy WER {:.2f}% > than {:.2f}%".format( wer * 100, wer_thr * 100)) beam_hypotheses = [] # Over mini-batch for i in evaluated_tensors[-1]: # Over samples for j in i: beam_hypotheses.append(j[0][1]) beam_wer = word_error_rate(hypotheses=beam_hypotheses, references=references) nf.logger.info("Beam WER {:.2f}%".format(beam_wer * 100)) assert beam_wer <= beam_wer_thr, ( "Final eval beam WER {:.2f}% > than {:.2f}%".format( beam_wer * 100, beam_wer_thr * 100)) assert beam_wer <= wer, ("Final eval beam WER > than the greedy WER.") # Reload model weights and train for extra 10 epochs checkpointer_callback = nemo.core.CheckpointCallback( folder=checkpoint_dir, step_freq=args.checkpoint_save_freq, force_load=True) nf.reset_trainer() nf.train(tensors_to_optimize=[loss], callbacks=[train_callback, checkpointer_callback], optimizer=args.optimizer, optimization_params={ "num_epochs": args.num_epochs + 10, "lr": args.lr, "momentum": args.momentum, "betas": betas, "weight_decay": args.weight_decay, "grad_norm_clip": None }, reset=True) evaluated_tensors = nf.infer(eval_tensors[:-1]) greedy_hypotheses = post_process_predictions(evaluated_tensors[1], vocab) references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab) wer_new = word_error_rate(hypotheses=greedy_hypotheses, references=references) nf.logger.info("New greedy WER: {:.2f}%".format(wer_new * 100)) assert wer_new <= wer * 1.1, ( f"Fine tuning: new WER {wer * 100:.2f}% > than the previous WER " f"{wer_new * 100:.2f}%")
def main(): args = parse_args() name = construct_name( args.exp_name, args.lr, args.batch_size, args.max_steps, args.num_epochs, args.weight_decay, args.optimizer, args.iter_per_step, ) # time stamp date_time = datetime.now().strftime("%m-%d-%Y -- %H-%M-%S") log_dir = name if args.work_dir: log_dir = os.path.join(args.work_dir, name) if args.tensorboard_dir is None: tensorboard_dir = os.path.join(name, 'tensorboard', date_time) else: tensorboard_dir = args.tensorboard_dir if args.checkpoint_dir is None: checkpoint_dir = os.path.join(name, date_time) else: base_checkpoint_dir = args.checkpoint_dir if len(glob.glob(os.path.join(base_checkpoint_dir, '*.pt'))) > 0: checkpoint_dir = base_checkpoint_dir else: checkpoint_dir = os.path.join(args.checkpoint_dir, date_time) # instantiate Neural Factory with supported backend neural_factory = nemo.core.NeuralModuleFactory( backend=nemo.core.Backend.PyTorch, local_rank=args.local_rank, optimization_level=args.amp_opt_level, log_dir=log_dir, checkpoint_dir=checkpoint_dir, create_tb_writer=args.create_tb_writer, files_to_copy=[args.model_config, __file__], cudnn_benchmark=args.cudnn_benchmark, tensorboard_dir=tensorboard_dir, ) args.num_gpus = neural_factory.world_size if args.local_rank is not None: logging.info('Doing ALL GPU') # build dags train_loss, callbacks, steps_per_epoch = create_all_dags( args, neural_factory) yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) lr_schedule = jasper_params.get('lr_schedule', 'CosineAnnealing') if lr_schedule == 'CosineAnnealing': lr_policy = CosineAnnealing( total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch, warmup_ratio=args.warmup_ratio, min_lr=args.min_lr, ) elif lr_schedule == 'PolynomialDecayAnnealing': lr_policy = PolynomialDecayAnnealing( total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch, warmup_ratio=args.warmup_ratio, min_lr=args.min_lr, power=2.0, ) elif lr_schedule == 'PolynomialHoldDecayAnnealing': lr_policy = PolynomialHoldDecayAnnealing( total_steps=args.max_steps if args.max_steps is not None else args.num_epochs * steps_per_epoch, warmup_ratio=args.warmup_ratio, hold_ratio=args.hold_ratio, min_lr=args.min_lr, power=2.0, ) else: raise ValueError("LR schedule is invalid !") logging.info(f"Using `{lr_policy}` Learning Rate Scheduler") # train model neural_factory.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=lr_policy, optimizer=args.optimizer, optimization_params={ "num_epochs": args.num_epochs, "max_steps": args.max_steps, "lr": args.lr, "momentum": 0.95, "betas": (args.beta1, args.beta2), "weight_decay": args.weight_decay, "grad_norm_clip": None, }, batches_per_step=args.iter_per_step, )
(args.batch_size * args.num_gpus * args.batch_per_step)) callback_dev = nemo.core.EvaluatorCallback( # eval_tensors=[dev_mlm_loss, dev_nsp_loss], eval_tensors=[dev_mlm_loss], user_iter_callback=eval_iter_callback, user_epochs_done_callback=eval_epochs_done_callback, eval_step=steps_per_epoch, tb_writer=tb_writer) # define learning rate decay policy if args.lr_decay_policy == "poly": lr_policy = SquareAnnealing(args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion) elif args.lr_decay_policy == "cosine": lr_policy = CosineAnnealing(args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion) elif args.lr_decay_policy == "noam": lr_policy = \ InverseSquareRootAnnealing(args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion) else: raise NotImplementedError # save config file if not os.path.exists(args.checkpoint_directory): os.makedirs(args.checkpoint_directory) config_path = os.path.join(args.checkpoint_directory, "bert-config.json") if not os.path.exists(config_path): bert_model.config.to_json_file(config_path)
def main(): parser = argparse.ArgumentParser(parents=[nm_argparse.NemoArgParser()], description='AN4 ASR', conflict_handler='resolve') # Overwrite default args parser.add_argument("--train_dataset", type=str, help="training dataset path") parser.add_argument("--eval_datasets", type=str, nargs=1, help="validation dataset path") # Create new args parser.add_argument("--lm", default="./an4-lm.3gram.binary", type=str) parser.add_argument("--test_after_training", action='store_true') parser.add_argument("--momentum", type=float) parser.add_argument("--beta1", default=0.95, type=float) parser.add_argument("--beta2", default=0.25, type=float) parser.set_defaults( model_config="./configs/jasper_an4.yaml", train_dataset="/home/mrjenkins/TestData/an4_dataset/an4_train.json", eval_datasets="/home/mrjenkins/TestData/an4_dataset/an4_val.json", work_dir="./tmp", optimizer="novograd", num_epochs=50, batch_size=48, eval_batch_size=64, lr=0.02, weight_decay=0.005, checkpoint_save_freq=1000, eval_freq=100, amp_opt_level="O1") args = parser.parse_args() betas = (args.beta1, args.beta2) wer_thr = 0.20 beam_wer_thr = 0.15 nf = nemo.core.NeuralModuleFactory(local_rank=args.local_rank, files_to_copy=[__file__], optimization_level=args.amp_opt_level, random_seed=0, log_dir=args.work_dir, create_tb_writer=True, cudnn_benchmark=args.cudnn_benchmark) tb_writer = nf.tb_writer checkpoint_dir = nf.checkpoint_dir # Load model definition yaml = YAML(typ="safe") with open(args.model_config) as f: jasper_params = yaml.load(f) (loss, eval_tensors, callbacks, total_steps, vocab, log_probs_e, encoded_len_e) = create_dags(jasper_params, args, nf) nf.train( tensors_to_optimize=[loss], callbacks=callbacks, optimizer=args.optimizer, lr_policy=CosineAnnealing(total_steps=total_steps, min_lr=args.lr / 100), optimization_params={ "num_epochs": args.num_epochs, "max_steps": args.max_steps, "lr": args.lr, "momentum": args.momentum, "betas": betas, "weight_decay": args.weight_decay, "grad_norm_clip": None }, batches_per_step=args.iter_per_step, amp_max_loss_scale=256., # synced_batchnorm=(nf.global_rank is not None), ) if args.test_after_training: nemo.logging.info("Testing greedy and beam search with LM WER.") # Create BeamSearch NM if nf.world_size > 1: nemo.logging.warning("Skipping beam search WER as it does not " "work if doing distributed training.") else: beam_search_with_lm = nemo_asr.BeamSearchDecoderWithLM( vocab=vocab, beam_width=64, alpha=2., beta=1.5, lm_path=args.lm, num_cpus=max(os.cpu_count(), 1)) beam_predictions = beam_search_with_lm( log_probs=log_probs_e, log_probs_length=encoded_len_e) eval_tensors.append(beam_predictions) evaluated_tensors = nf.infer(eval_tensors) if nf.global_rank in [0, None]: greedy_hypotheses = post_process_predictions( evaluated_tensors[1], vocab) references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab) wer = word_error_rate(hypotheses=greedy_hypotheses, references=references) nemo.logging.info("Greedy WER: {:.2f}%".format(wer * 100)) if wer > wer_thr: nf.sync_all_processes(False) raise ValueError(f"Final eval greedy WER {wer*100:.2f}% > :" f"than {wer_thr*100:.2f}%") nf.sync_all_processes() if nf.world_size == 1: beam_hypotheses = [] # Over mini-batch for i in evaluated_tensors[-1]: # Over samples for j in i: beam_hypotheses.append(j[0][1]) beam_wer = word_error_rate(hypotheses=beam_hypotheses, references=references) nemo.logging.info("Beam WER {:.2f}%".format(beam_wer * 100)) assert beam_wer <= beam_wer_thr, ( "Final eval beam WER {:.2f}% > than {:.2f}%".format( beam_wer * 100, beam_wer_thr * 100)) assert beam_wer <= wer, ( "Final eval beam WER > than the greedy WER.") # Reload model weights and train for extra 10 epochs checkpointer_callback = nemo.core.CheckpointCallback( folder=checkpoint_dir, step_freq=args.checkpoint_save_freq, force_load=True) # Distributed Data Parallel changes the underlying class so we need # to reinstantiate Encoder and Decoder args.num_epochs += 10 previous_step_count = total_steps loss, eval_tensors, callbacks, total_steps, vocab, _, _ = create_dags( jasper_params, args, nf) nf.reset_trainer() nf.train( tensors_to_optimize=[loss], callbacks=callbacks, optimizer=args.optimizer, lr_policy=CosineAnnealing(warmup_steps=previous_step_count, total_steps=total_steps), optimization_params={ "num_epochs": args.num_epochs, "lr": args.lr / 100, "momentum": args.momentum, "betas": betas, "weight_decay": args.weight_decay, "grad_norm_clip": None }, reset=True, amp_max_loss_scale=256., # synced_batchnorm=(nf.global_rank is not None), ) evaluated_tensors = nf.infer(eval_tensors) if nf.global_rank in [0, None]: greedy_hypotheses = post_process_predictions( evaluated_tensors[1], vocab) references = post_process_transcripts(evaluated_tensors[2], evaluated_tensors[3], vocab) wer_new = word_error_rate(hypotheses=greedy_hypotheses, references=references) nemo.logging.info("New greedy WER: {:.2f}%".format(wer_new * 100)) if wer_new > wer * 1.1: nf.sync_all_processes(False) raise ValueError( f"Fine tuning: new WER {wer_new* 100:.2f}% > than the " f"previous WER {wer * 100:.2f}%") nf.sync_all_processes() # Open the log file and ensure that epochs is strictly increasing if nf._exp_manager.log_file: epochs = [] with open(nf._exp_manager.log_file, "r") as log_file: line = log_file.readline() while line: index = line.find("Starting epoch") if index != -1: epochs.append(int(line[index + len("Starting epoch"):])) line = log_file.readline() for i, e in enumerate(epochs): if i != e: raise ValueError("Epochs from logfile was not understood")
} }) train_data_size = len(train_data_layer) steps_per_epoch = int(train_data_size / (args.batch_size * args.num_gpus)) print("steps_per_epoch =", steps_per_epoch) callback_eval = nemo.core.EvaluatorCallback( eval_tensors=[eval_logits, eval_seq_ids], user_iter_callback=lambda x, y: eval_iter_callback(x, y, eval_data_layer, tag_ids), user_epochs_done_callback=lambda x: eval_epochs_done_callback( x, tag_ids, args.output_filename), tb_writer=tb_writer, eval_step=steps_per_epoch) if args.lr_policy == "lr_warmup": lr_policy_func = WarmupAnnealing(args.num_epochs * steps_per_epoch, warmup_ratio=args.lr_warmup_proportion) elif args.lr_policy == "lr_poly": lr_policy_func = SquareAnnealing(args.num_epochs * steps_per_epoch) elif args.lr_policy == "lr_cosine": lr_policy_func = CosineAnnealing(args.num_epochs * steps_per_epoch) else: raise ValueError("Invalid lr_policy, must be lr_warmup or lr_poly") optimizer.train(tensors_to_optimize=[train_loss], callbacks=[callback_train, callback_eval], lr_policy=lr_policy_func)
def main(): # Usage and Command line arguments parser = ArgumentParser() parser.add_argument( "--asr_model", type=str, default="QuartzNet15x5-En", required=True, help= "Pass: '******', 'QuartzNet15x5-Zh', or 'JasperNet10x5-En' to train from pre-trained models. To train from scratch pass path to modelfile ending with .yaml.", ) parser.add_argument( "--amp_opt_level", default="O0", type=str, choices=["O0", "O1", "O2", "O3"], help="See: https://nvidia.github.io/apex/amp.html", ) parser.add_argument("--train_dataset", type=str, required=True, default=None, help="training dataset path") parser.add_argument("--eval_datasets", type=str, nargs="*", help="evaluation datasets paths") parser.add_argument("--eval_freq", default=1000, type=int, help="Evaluation frequency") parser.add_argument("--eval_batch_size", type=int, default=8, help="batch size to use for evaluation") parser.add_argument("--local_rank", default=None, type=int, help="node rank for distributed training") parser.add_argument("--stats_freq", default=25, type=int, help="frequency with which to update train stats") parser.add_argument("--checkpoint_dir", default=None, type=str, help="Folder where to save checkpoints") parser.add_argument("--checkpoint_save_freq", required=False, type=int, help="how often to checkpoint") parser.add_argument("--optimizer", default="novograd", type=str) parser.add_argument("--warmup_ratio", default=0.02, type=float, help="learning rate warmup ratio") parser.add_argument("--batch_size", required=True, type=int, help="train batch size per GPU") parser.add_argument("--num_epochs", default=5, type=int, help="number of epochs to train") parser.add_argument("--lr", default=0.01, type=float) parser.add_argument("--beta1", default=0.95, type=float) parser.add_argument("--beta2", default=0.5, type=float) parser.add_argument("--weight_decay", default=0.001, type=float) parser.add_argument("--iter_per_step", default=1, type=int, help="number of grad accumulations per batch") parser.add_argument("--wandb_exp_name", default=None, type=str) parser.add_argument("--wandb_project", default=None, type=str) parser.add_argument("--max_train_audio_len", default=16.7, type=float, help="max audio length") parser.add_argument("--do_not_trim_silence", action="store_false", help="Add this flag to disable silence trimming") parser.add_argument("--do_not_normalize_text", action="store_false", help="Add this flag to set to False for non-English.") args = parser.parse_args() # Setup NeuralModuleFactory to control training # instantiate Neural Factory with supported backend nf = nemo.core.NeuralModuleFactory( local_rank=args. local_rank, # This is necessary for distributed training optimization_level=args. amp_opt_level, # This is necessary for mixed precision optimization cudnn_benchmark=True, ) # Instantiate the model which we'll train if args.asr_model.endswith('.yaml'): logging.info( f"Speech2Text: Will train from scratch using config from {args.asr_model}" ) asr_model = nemo_asr.models.ASRConvCTCModel.import_from_config( args.asr_model) else: logging.info(f"Speech2Text: Will fine-tune from {args.asr_model}") asr_model = nemo_asr.models.ASRConvCTCModel.from_pretrained( model_info=args.asr_model, local_rank=args.local_rank) if args.asr_model.strip().endswith('-Zh'): logging.info('USING CER') eval_metric = 'CER' else: eval_metric = 'WER' logging.info("\n\n") logging.info(f"Speech2Text: Training on {nf.world_size} GPUs.") logging.info(f"Training {type(asr_model)} model.") logging.info(f"Training CTC model with alphabet {asr_model.vocabulary}.") logging.info( f"Training CTC model with {asr_model.num_weights} weights.\n\n") train_data_layer = nemo_asr.AudioToTextDataLayer( manifest_filepath=args.train_dataset, labels=asr_model.vocabulary, batch_size=args.batch_size, trim_silence=args.do_not_trim_silence, max_duration=args.max_train_audio_len, shuffle=True, normalize_transcripts=args.do_not_normalize_text, ) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(asr_model.vocabulary)) greedy_decoder = nemo_asr.GreedyCTCDecoder() audio_signal, audio_signal_len, transcript, transcript_len = train_data_layer( ) log_probs, encoded_len = asr_model(input_signal=audio_signal, length=audio_signal_len) predictions = greedy_decoder(log_probs=log_probs) loss = ctc_loss(log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len) # Callbacks which we'll be using: callbacks = [] # SimpleLossLogger prints basic training stats (e.g. loss) to console train_callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss, predictions, transcript, transcript_len], step_freq=args.stats_freq, print_func=partial(monitor_asr_train_progress, labels=asr_model.vocabulary, eval_metric=eval_metric), ) callbacks.append(train_callback) if args.checkpoint_dir is not None and args.checkpoint_save_freq is not None: # Checkpoint callback saves checkpoints periodically checkpointer_callback = nemo.core.CheckpointCallback( folder=args.checkpoint_dir, step_freq=args.checkpoint_save_freq) callbacks.append(checkpointer_callback) if args.wandb_exp_name is not None and args.wandb_project is not None: # WandbCallback saves stats to Weights&Biases wandb_callback = nemo.core.WandBLogger( step_freq=args.stats_freq, wandb_name=args.wandb_exp_name, wandb_project=args.wandb_project, args=args) callbacks.append(wandb_callback) # Evaluation if args.eval_datasets is not None and args.eval_freq is not None: asr_model.eval() # switch model to evaluation mode logging.info(f"Will perform evaluation every {args.eval_freq} steps.") for ind, eval_dataset in enumerate(args.eval_datasets): eval_data_layer = nemo_asr.AudioToTextDataLayer( manifest_filepath=eval_dataset, labels=asr_model.vocabulary, batch_size=args.eval_batch_size, normalize_transcripts=args.do_not_normalize_text, ) audio_signal, audio_signal_len, transcript, transcript_len = eval_data_layer( ) log_probs, encoded_len = asr_model(input_signal=audio_signal, length=audio_signal_len) eval_predictions = greedy_decoder(log_probs=log_probs) eval_loss = ctc_loss(log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len) tag_name = os.path.basename(eval_dataset).split(".")[0] eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[ eval_loss, eval_predictions, transcript, transcript_len ], user_iter_callback=partial(process_evaluation_batch, labels=asr_model.vocabulary), user_epochs_done_callback=partial(process_evaluation_epoch, tag=tag_name, eval_metric=eval_metric), eval_step=args.eval_freq, wandb_name=args.wandb_exp_name, wandb_project=args.wandb_project, ) callbacks.append(eval_callback) steps_in_epoch = len(train_data_layer) / ( args.batch_size * args.iter_per_step * nf.world_size) lr_policy = CosineAnnealing(total_steps=args.num_epochs * steps_in_epoch, warmup_ratio=args.warmup_ratio) nf.train( tensors_to_optimize=[loss], callbacks=callbacks, optimizer=args.optimizer, optimization_params={ "num_epochs": args.num_epochs, "lr": args.lr, "betas": (args.beta1, args.beta2), "weight_decay": args.weight_decay, }, batches_per_step=args.iter_per_step, lr_policy=lr_policy, )
if args.cl: classify_callback = RunClassifierCallback( eval_step=100, name=args.name, num_classes=len(labels), gpu=args.classify_gpu, hidden_size=args.hidden_size, manifest=args.manifest, model=args.model ) callbacks.append(classify_callback) lr_policy = CosineAnnealing( total_steps=num_epochs * steps_per_epoch, warmup_ratio=0.05, min_lr=args.lr_end, ) logging.info(f"Using `{lr_policy}` Learning Rate Scheduler") neural_factory.train( tensors_to_optimize=[train_loss], callbacks=callbacks, lr_policy=lr_policy, optimizer="novograd", optimization_params={ "num_epochs": num_epochs, "max_steps": None, "lr": lr, "momentum": 0.95,
eval_callback = nemo.core.EvaluatorCallback( eval_tensors=[loss_v, predictions_v, transcript_v, transcript_len_v], # how to process evaluation batch - e.g. compute WER user_iter_callback=partial(process_evaluation_batch, labels=labels), # how to aggregate statistics (e.g. WER) for the evaluation epoch user_epochs_done_callback=partial(process_evaluation_epoch, tag="DEV-CLEAN", logger=logger), eval_step=500, tb_writer=tb_writer) # Run training using your Neural Factory # Once this "action" is called data starts flowing along train and eval DAGs # and computations start to happen nf.train( # Specify the loss to optimize for tensors_to_optimize=[loss], # Specify which callbacks you want to run callbacks=[train_callback, eval_callback, saver_callback], # Specify what optimizer to use optimizer="novograd", # Specify optimizer parameters such as num_epochs and lr optimization_params={ "num_epochs": 100, "lr": 0.02, "weight_decay": 1e-4, "grad_norm_clip": None }, batches_per_step=8, lr_policy=CosineAnnealing(100 * int(len(data_layer._dataset) / (16. * 8)), warmup_steps=1000))