def test_network(a=1): model, _, _ = load_checkpoint(a) if torch.cuda.is_available(): model.cuda() print('Training model on GPU', "\n") loss_fn = nn.NLLLoss() _, _, testloader = prepare_dataloaders() print('Starting Validation on Test Set') # validation on the test set model.eval() accuracy = 0 test_loss = 0 for ii, (images, labels) in enumerate(testloader): inputs = Variable(images) inputs = inputs.cuda() labels = Variable(labels) labels = labels.cuda() output = model.forward(inputs) test_loss += loss_fn(output, labels).data ps = torch.exp(output).data equality = (labels.data == ps.max(1)[1]) accuracy += equality.type_as(torch.FloatTensor()).mean() print(ii, "Images tested") print("Test Loss: {:.3f}.. ".format(test_loss / len(testloader)), "Test Accuracy: {:.3f}".format(accuracy / len(testloader)))
def __init__(self, device=None, jit=False): self.device = device self.jit = jit self.opt = Namespace(**{ 'batch_size': 128, 'd_inner_hid': 2048, 'd_k': 64, 'd_model': 512, 'd_word_vec': 512, 'd_v': 64, 'data_pkl': 'm30k_deen_shr.pkl', 'debug': '', 'dropout': 0.1, 'embs_share_weight': False, 'epoch': 1, 'label_smoothing': False, 'log': None, 'n_head': 8, 'n_layers': 6, 'n_warmup_steps': 128, 'cuda': True, 'proj_share_weight': False, 'save_mode': 'best', 'save_model': None, 'script': False, 'train_path': None, 'val_path': None, }) _, validation_data = prepare_dataloaders(self.opt, self.device) transformer = Transformer( self.opt.src_vocab_size, self.opt.trg_vocab_size, src_pad_idx=self.opt.src_pad_idx, trg_pad_idx=self.opt.trg_pad_idx, trg_emb_prj_weight_sharing=self.opt.proj_share_weight, emb_src_trg_weight_sharing=self.opt.embs_share_weight, d_k=self.opt.d_k, d_v=self.opt.d_v, d_model=self.opt.d_model, d_word_vec=self.opt.d_word_vec, d_inner=self.opt.d_inner_hid, n_layers=self.opt.n_layers, n_head=self.opt.n_head, dropout=self.opt.dropout).to(self.device) if self.jit: transformer = torch.jit.script(transformer) self.module = transformer batch = list(validation_data)[0] src_seq = patch_src(batch.src, self.opt.src_pad_idx).to(self.device) trg_seq, self.gold = map(lambda x: x.to(self.device), patch_trg(batch.trg, self.opt.trg_pad_idx)) # We use validation_data for training as well so that it can finish fast enough. self.example_inputs = (src_seq, trg_seq)
def test_dataloader(): train_loader, valset, collate_fn = prepare_dataloaders(hparams) for i, batch in enumerate(train_loader): print(batch) break
default=1, help="""If verbose is set, will output the n_best decoded sentences""") parser.add_argument('-return_attns', action='store_true') parser.add_argument('-debug', action='store_true') parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-dev', action='store_true') opt = parser.parse_args() assert opt.dev or opt.out != None, "You are not in dev mode but you don't have an output file" opt.cuda = not opt.no_cuda #========= Loading Dataset =========# data = torch.load(opt.data) training_data, validation_data = prepare_dataloaders(data, opt) attributor = Attribution(opt) #attributor.attributor_batch_beam(validation_data,opt) if opt.dev: if opt.debug: IG, tgt_IG, src_seq, translated_sentence, tgt_trans_sent = attributor.attribute_batch( validation_data, dev=True, debug=True) right_line = [ validation_data.dataset.tgt_idx2word[idx.item()] for idx in tgt_trans_sent ] else: IG, src_seq, translated_sentence = attributor.attribute_batch( validation_data, dev=True) original_line = [
parser.add_argument('-dropout', type=float, default=0.1) parser.add_argument('-embs_share_weight', action='store_true') parser.add_argument('-proj_share_weight', action='store_true') parser.add_argument('-log', default=None) parser.add_argument('-save_model', default=None) parser.add_argument('-save_mode', type=str, choices=['all', 'best'], default='best') parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-label_smoothing', action='store_true') opt = parser.parse_args() device = 'cpu' training_data, validation_data = prepare_dataloaders(opt, device) from seq2seq.RecurrentSeq2Seq import Encoder, Decoder, Seq2Seq encoder = Encoder(input_dim=opt.src_vocab_size) decoder = Decoder(output_dim=opt.trg_vocab_size) model = Seq2Seq(encoder, decoder, device) for batch in training_data: src = batch.src # [seq_src, bs] trg = batch.trg # [seq_trg, bs] src_seq = patch_src(batch.src, opt.src_pad_idx).to(device) trg_seq, gold = map(lambda x: x.to(device), patch_trg(batch.trg, opt.trg_pad_idx)) break
def GTA_Synthesis(hparams, args, extra_info='', audio_offset=0): """Generate Ground-Truth-Aligned Spectrograms for Training WaveGlow.""" rank = args.rank n_gpus = args.n_gpus torch.manual_seed(hparams.seed) torch.cuda.manual_seed(hparams.seed) if args.use_validation_files: filelisttype = "val" hparams.training_files = hparams.validation_files else: filelisttype = "train" # initialize blank model print('Initializing Tacotron2...') model = load_model(hparams) print('Done') global model_args model_args = get_args(model.forward) model.eval() # Load checkpoint assert args.checkpoint_path is not None print('Loading Tacotron2 Checkpoint...') model = warm_start_model(args.checkpoint_path, model) print('Done') _ = model.train() if args.use_training_mode else model.eval()# set model to either train() or eval() mode. (controls dropout + DFR) print("Initializing AMP Model") if hparams.fp16_run: model = amp.initialize(model, opt_level='O2') print('Done') # define datasets/dataloaders train_loader, valset, collate_fn, train_sampler, trainset = prepare_dataloaders(hparams, model_args, args, None, audio_offset=audio_offset) # load and/or generate global_mean if args.use_training_mode and hparams.drop_frame_rate > 0.: if rank != 0: # if global_mean not yet calcuated, wait for main thread to do it while not os.path.exists(hparams.global_mean_npy): time.sleep(1) hparams.global_mean = get_global_mean(train_loader, hparams.global_mean_npy, hparams) # ================ MAIN TRAINNIG LOOP! =================== os.makedirs(os.path.join(args.output_directory), exist_ok=True) f = open(os.path.join(args.output_directory, f'map_{filelisttype}_gpu{rank}.txt'),'w', encoding='utf-8') processed_files = 0 failed_files = 0 duration = time.time() total = len(train_loader) rolling_sum = StreamingMovingAverage(100) for i, y in enumerate(train_loader): y_gpu = model.parse_batch(y) # move batch to GPU y_pred_gpu = force(model, valid_kwargs=model_args, **{**y_gpu, "teacher_force_till": 0, "p_teacher_forcing": 1.0, "drop_frame_rate": 0.0}) y_pred = {k: v.cpu() for k,v in y_pred_gpu.items() if v is not None}# move model outputs to CPU if args.fp16_save: y_pred = {k: v.half() for k,v in y_pred.items()}# convert model outputs to fp16 if args.save_letter_alignments or args.save_phone_alignments: alignments = get_alignments(y_pred['alignments'], y['mel_lengths'], y['text_lengths'])# [B, mel_T, txt_T] -> [[B, mel_T, txt_T], [B, mel_T, txt_T], ...] offset_append = '' if audio_offset == 0 else str(audio_offset) for j in range(len(y['gt_mel'])): gt_mel = y['gt_mel' ][j, :, :y['mel_lengths'][j]] pred_mel = y_pred['pred_mel_postnet'][j, :, :y['mel_lengths'][j]] audiopath = y['audiopath'][j] speaker_id_ext = y['speaker_id_ext'][j] if True or (args.max_mse or args.max_mae): MAE = F. l1_loss(pred_mel, gt_mel).item() MSE = F.mse_loss(pred_mel, gt_mel).item() if args.max_mse and MSE > args.max_mse: print(f"MSE ({MSE}) is greater than max MSE ({args.max_mse}).\nFilepath: '{audiopath}'\n") failed_files+=1; continue if args.max_mae and MAE > args.max_mae: print(f"MAE ({MAE}) is greater than max MAE ({args.max_mae}).\nFilepath: '{audiopath}'\n") failed_files+=1; continue else: MAE = MSE = 'N/A' print(f"PATH: '{audiopath}'\nMel Shape:{list(gt_mel.shape)}\nSpeaker_ID: {speaker_id_ext}\nMSE: {MSE}\nMAE: {MAE}") if not args.do_not_save_mel: pred_mel_path = os.path.splitext(audiopath)[0]+'.pred_mel.pt' torch.save(pred_mel.clone(), pred_mel_path) pm_audio_path = os.path.splitext(audiopath)[0]+'.pm_audio.pt'# predicted mel audio torch.save(y['gt_audio'][j, :y['audio_lengths'][j]].clone(), pm_audio_path) if args.save_letter_alignments and hparams.p_arpabet == 0.: save_path_align_out = os.path.splitext(audiopath)[0]+'_galign.pt' np.save(alignments[j].clone(), save_path_align_out) if args.save_phone_alignments and hparams.p_arpabet == 1.: save_path_align_out = os.path.splitext(audiopath)[0]+'_palign.pt' np.save(alignments[j].clone(), save_path_align_out) map = f"{audiopath}|{y['gtext_str'][j]}|{speaker_id_ext}|\n" f.write(map)# write paths to text file processed_files+=1 print("") duration = time.time() - duration avg_duration = rolling_sum.process(duration) time_left = round(((total-i) * avg_duration)/3600, 2) print(f'{extra_info}{i}/{total} compute and save GTA melspectrograms in {i}th batch, {duration}s, {time_left}hrs left. {processed_files} processed, {failed_files} failed.') duration = time.time() f.close() if n_gpus > 1: torch.distributed.barrier()# wait till all graphics cards reach this point. # merge all generated filelists from every GPU filenames = [f'map_{filelisttype}_gpu{j}.txt' for j in range(n_gpus)] if rank == 0: with open(os.path.join(args.output_directory, f'map_{filelisttype}.txt'), 'w') as outfile: for fname in filenames: with open(os.path.join(args.output_directory, fname)) as infile: for line in infile: if len(line.strip()): outfile.write(line)
if __name__ == '__main__': N = 68669 # hard coded because loading the cache is a pain cv = model_selection.KFold(n_splits=10) cache_path = '~/ct/ct-nlp-data/NLPPipeline_dsk_cache.pbz' jobs = ((cache_path, i_split, trn, tst) for i_split, (trn, tst) in enumerate(cv.split(list(range(N))))) parallel = joblib.Parallel(n_jobs=2) data_files = parallel( joblib.delayed(generate_split_data)(*job) for job in jobs) for i_split, datafile in enumerate(data_files): data = torch.load(datafile) args = train.parser.parse_args() trn_data, val_data =\ train.prepare_dataloaders(data, batch_size=args.batch_size) src_vocab_size = trn_data.dataset.src_vocab_size tgt_vocab_size = trn_data.dataset.tgt_vocab_size DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') mdl = Transformer( src_vocab_size, tgt_vocab_size, # include <BOS> and <EOS> data['settings'].get('max_word_seq_len', 200) + 2, tgt_emb_prj_weight_sharing=args.proj_share_weight, emb_src_tgt_weight_sharing=args.embs_share_weight, d_k=args.d_k, d_v=args.d_v, d_model=args.d_model, d_word_vec=args.d_word_vec,