def create_infer_dags( neural_factory, neural_modules, tacotron2_params, infer_dataset, infer_batch_size, cpu_per_dl=1, ): (_, text_embedding, t2_enc, t2_dec, t2_postnet, _, _) = neural_modules data_layer = nemo_asr.TranscriptDataLayer( path=infer_dataset, labels=tacotron2_params['labels'], batch_size=infer_batch_size, num_workers=cpu_per_dl, # load_audio=False, bos_id=len(tacotron2_params['labels']), eos_id=len(tacotron2_params['labels']) + 1, pad_id=len(tacotron2_params['labels']) + 2, shuffle=False, ) transcript, transcript_len = data_layer() transcript_embedded = text_embedding(char_phone=transcript) transcript_encoded = t2_enc(char_phone_embeddings=transcript_embedded, embedding_length=transcript_len,) if isinstance(t2_dec, nemo_tts.Tacotron2DecoderInfer): mel_decoder, gate, alignments, mel_len = t2_dec( char_phone_encoded=transcript_encoded, encoded_length=transcript_len, ) else: raise ValueError("The Neural Module for tacotron2 decoder was not understood") mel_postnet = t2_postnet(mel_input=mel_decoder) return [mel_postnet, gate, alignments, mel_len]
def create_dag(args, cfg, num_gpus): # Defining nodes data = nemo_asr.TranscriptDataLayer( path=args.train_dataset, labels=cfg['target']['labels'], eos_id=cfg['target']['eos_id'], pad_id=cfg['target']['pad_id'], batch_size=cfg['optimization']['batch_size'], drop_last=True, ) data_eval = nemo_asr.AudioToTextDataLayer( manifest_filepath=args.eval_datasets, labels=cfg['target']['labels'], eos_id=cfg['target']['eos_id'], batch_size=cfg['inference']['batch_size'], load_audio=False, ) decoder = nemo.backends.pytorch.DecoderRNN( voc_size=len(cfg['target']['labels']), bos_id=cfg['target']['bos_id'], **cfg['DecoderRNN'], ) num_data = len(data) batch_size = cfg['optimization']['batch_size'] num_epochs = cfg['optimization']['params']['num_epochs'] steps_per_epoch = int(num_data / (batch_size)) total_steps = num_epochs * steps_per_epoch vsc = ValueSetterCallback tf_callback = ValueSetterCallback( decoder, 'teacher_forcing', policies=[vsc.Policy(vsc.Method.Const(1.0), start=0.0, end=1.0),], total_steps=total_steps, ) seq_loss = nemo.backends.pytorch.SequenceLoss( pad_id=cfg['target']['pad_id'], smoothing_coef=cfg['optimization']['smoothing_coef'], ) saver_callback = nemo.core.ModuleSaverCallback( save_modules_list=[decoder], folder=args.checkpoint_dir, step_freq=args.checkpoint_save_freq, ) # Creating DAG texts, _ = data() log_probs, _ = decoder(targets=texts) train_loss = seq_loss(log_probs=log_probs, targets=texts) evals = [] _, _, texts, _ = data_eval() log_probs, _ = decoder(targets=texts) eval_loss = seq_loss(log_probs=log_probs, targets=texts) evals.append((args.eval_datasets, (eval_loss, log_probs, texts))) # Update config cfg['num_params'] = {'decoder': decoder.num_weights} cfg['num_params']['total'] = sum(cfg['num_params'].values()) cfg['input']['train'] = {'num_data': num_data} cfg['optimization']['steps_per_epoch'] = steps_per_epoch cfg['optimization']['total_steps'] = total_steps return (train_loss, evals), cfg, [tf_callback, saver_callback]