Esempio n. 1
0
def test_network(a=1):

    model, _, _ = load_checkpoint(a)

    if torch.cuda.is_available():
        model.cuda()
        print('Training model on GPU', "\n")

    loss_fn = nn.NLLLoss()

    _, _, testloader = prepare_dataloaders()

    print('Starting Validation on Test Set')
    # validation on the test set
    model.eval()
    accuracy = 0
    test_loss = 0
    for ii, (images, labels) in enumerate(testloader):

        inputs = Variable(images)
        inputs = inputs.cuda()
        labels = Variable(labels)
        labels = labels.cuda()
        output = model.forward(inputs)
        test_loss += loss_fn(output, labels).data
        ps = torch.exp(output).data
        equality = (labels.data == ps.max(1)[1])
        accuracy += equality.type_as(torch.FloatTensor()).mean()
        print(ii, "Images tested")

    print("Test Loss: {:.3f}.. ".format(test_loss / len(testloader)),
          "Test Accuracy: {:.3f}".format(accuracy / len(testloader)))
Esempio n. 2
0
    def __init__(self, device=None, jit=False):
        self.device = device
        self.jit = jit
        self.opt = Namespace(**{
            'batch_size': 128,
            'd_inner_hid': 2048,
            'd_k': 64,
            'd_model': 512,
            'd_word_vec': 512,
            'd_v': 64,
            'data_pkl': 'm30k_deen_shr.pkl',
            'debug': '',
            'dropout': 0.1,
            'embs_share_weight': False,
            'epoch': 1,
            'label_smoothing': False,
            'log': None,
            'n_head': 8,
            'n_layers': 6,
            'n_warmup_steps': 128,
            'cuda': True,
            'proj_share_weight': False,
            'save_mode': 'best',
            'save_model': None,
            'script': False,
            'train_path': None,
            'val_path': None,
        })

        _, validation_data = prepare_dataloaders(self.opt, self.device)
        transformer = Transformer(
            self.opt.src_vocab_size,
            self.opt.trg_vocab_size,
            src_pad_idx=self.opt.src_pad_idx,
            trg_pad_idx=self.opt.trg_pad_idx,
            trg_emb_prj_weight_sharing=self.opt.proj_share_weight,
            emb_src_trg_weight_sharing=self.opt.embs_share_weight,
            d_k=self.opt.d_k,
            d_v=self.opt.d_v,
            d_model=self.opt.d_model,
            d_word_vec=self.opt.d_word_vec,
            d_inner=self.opt.d_inner_hid,
            n_layers=self.opt.n_layers,
            n_head=self.opt.n_head,
            dropout=self.opt.dropout).to(self.device)

        if self.jit:
            transformer = torch.jit.script(transformer)
        self.module = transformer

        batch = list(validation_data)[0]
        src_seq = patch_src(batch.src, self.opt.src_pad_idx).to(self.device)
        trg_seq, self.gold = map(lambda x: x.to(self.device), patch_trg(batch.trg, self.opt.trg_pad_idx))
        # We use validation_data for training as well so that it can finish fast enough.
        self.example_inputs = (src_seq, trg_seq)
def test_dataloader():
    train_loader, valset, collate_fn = prepare_dataloaders(hparams)
    for i, batch in enumerate(train_loader):
        print(batch)
        break
Esempio n. 4
0
                        default=1,
                        help="""If verbose is set, will output the n_best
                        decoded sentences""")
    parser.add_argument('-return_attns', action='store_true')
    parser.add_argument('-debug', action='store_true')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-dev', action='store_true')

    opt = parser.parse_args()
    assert opt.dev or opt.out != None, "You are not in dev mode but you don't have an output file"
    opt.cuda = not opt.no_cuda

    #========= Loading Dataset =========#
    data = torch.load(opt.data)

    training_data, validation_data = prepare_dataloaders(data, opt)
    attributor = Attribution(opt)
    #attributor.attributor_batch_beam(validation_data,opt)
    if opt.dev:
        if opt.debug:
            IG, tgt_IG, src_seq, translated_sentence, tgt_trans_sent = attributor.attribute_batch(
                validation_data, dev=True, debug=True)
            right_line = [
                validation_data.dataset.tgt_idx2word[idx.item()]
                for idx in tgt_trans_sent
            ]
        else:
            IG, src_seq, translated_sentence = attributor.attribute_batch(
                validation_data, dev=True)

        original_line = [
Esempio n. 5
0
parser.add_argument('-dropout', type=float, default=0.1)
parser.add_argument('-embs_share_weight', action='store_true')
parser.add_argument('-proj_share_weight', action='store_true')

parser.add_argument('-log', default=None)
parser.add_argument('-save_model', default=None)
parser.add_argument('-save_mode', type=str, choices=['all', 'best'], default='best')

parser.add_argument('-no_cuda', action='store_true')
parser.add_argument('-label_smoothing', action='store_true')

opt = parser.parse_args()

device = 'cpu'

training_data, validation_data = prepare_dataloaders(opt, device)

from seq2seq.RecurrentSeq2Seq import Encoder, Decoder, Seq2Seq

encoder = Encoder(input_dim=opt.src_vocab_size)
decoder = Decoder(output_dim=opt.trg_vocab_size)
model = Seq2Seq(encoder, decoder, device)

for batch in training_data:
    src = batch.src # [seq_src, bs]
    trg = batch.trg # [seq_trg, bs]

    src_seq = patch_src(batch.src, opt.src_pad_idx).to(device)
    trg_seq, gold = map(lambda x: x.to(device), patch_trg(batch.trg, opt.trg_pad_idx))
    break
Esempio n. 6
0
def GTA_Synthesis(hparams, args, extra_info='', audio_offset=0):
    """Generate Ground-Truth-Aligned Spectrograms for Training WaveGlow."""
    rank   = args.rank
    n_gpus = args.n_gpus
    torch.manual_seed(hparams.seed)
    torch.cuda.manual_seed(hparams.seed)
    
    if args.use_validation_files:
        filelisttype = "val"
        hparams.training_files = hparams.validation_files
    else:
        filelisttype = "train"
    
    # initialize blank model
    print('Initializing Tacotron2...')
    model = load_model(hparams)
    print('Done')
    global model_args
    model_args = get_args(model.forward)
    model.eval()
    
    # Load checkpoint
    assert args.checkpoint_path is not None
    print('Loading Tacotron2 Checkpoint...')
    model = warm_start_model(args.checkpoint_path, model)
    print('Done')
    
    _ = model.train() if args.use_training_mode else model.eval()# set model to either train() or eval() mode. (controls dropout + DFR)
    
    print("Initializing AMP Model")
    if hparams.fp16_run:
        model = amp.initialize(model, opt_level='O2')
    print('Done')
    
    # define datasets/dataloaders
    train_loader, valset, collate_fn, train_sampler, trainset = prepare_dataloaders(hparams, model_args, args, None, audio_offset=audio_offset)
    
    # load and/or generate global_mean
    if args.use_training_mode and hparams.drop_frame_rate > 0.:
        if rank != 0: # if global_mean not yet calcuated, wait for main thread to do it
            while not os.path.exists(hparams.global_mean_npy): time.sleep(1)
        hparams.global_mean = get_global_mean(train_loader, hparams.global_mean_npy, hparams)
    
    # ================ MAIN TRAINNIG LOOP! ===================
    os.makedirs(os.path.join(args.output_directory), exist_ok=True)
    f = open(os.path.join(args.output_directory, f'map_{filelisttype}_gpu{rank}.txt'),'w', encoding='utf-8')
    
    processed_files = 0
    failed_files = 0
    duration = time.time()
    total = len(train_loader)
    rolling_sum = StreamingMovingAverage(100)
    for i, y in enumerate(train_loader):
        y_gpu = model.parse_batch(y) # move batch to GPU
        
        y_pred_gpu = force(model, valid_kwargs=model_args, **{**y_gpu, "teacher_force_till": 0, "p_teacher_forcing": 1.0, "drop_frame_rate": 0.0})
        y_pred = {k: v.cpu() for k,v in y_pred_gpu.items() if v is not None}# move model outputs to CPU
        if args.fp16_save:
            y_pred = {k: v.half() for k,v in y_pred.items()}# convert model outputs to fp16
        
        if args.save_letter_alignments or args.save_phone_alignments:
            alignments = get_alignments(y_pred['alignments'], y['mel_lengths'], y['text_lengths'])# [B, mel_T, txt_T] -> [[B, mel_T, txt_T], [B, mel_T, txt_T], ...]
        
        offset_append = '' if audio_offset == 0 else str(audio_offset)
        for j in range(len(y['gt_mel'])):
            gt_mel   = y['gt_mel'  ][j, :, :y['mel_lengths'][j]]
            pred_mel = y_pred['pred_mel_postnet'][j, :, :y['mel_lengths'][j]]
            
            audiopath      = y['audiopath'][j]
            speaker_id_ext = y['speaker_id_ext'][j]
            
            if True or (args.max_mse or args.max_mae):
                MAE = F. l1_loss(pred_mel, gt_mel).item()
                MSE = F.mse_loss(pred_mel, gt_mel).item()
                if args.max_mse and MSE > args.max_mse:
                    print(f"MSE ({MSE}) is greater than max MSE ({args.max_mse}).\nFilepath: '{audiopath}'\n")
                    failed_files+=1; continue
                if args.max_mae and MAE > args.max_mae:
                    print(f"MAE ({MAE}) is greater than max MAE ({args.max_mae}).\nFilepath: '{audiopath}'\n")
                    failed_files+=1; continue
            else:
                MAE = MSE = 'N/A'
            
            print(f"PATH: '{audiopath}'\nMel Shape:{list(gt_mel.shape)}\nSpeaker_ID: {speaker_id_ext}\nMSE: {MSE}\nMAE: {MAE}")
            if not args.do_not_save_mel:
                pred_mel_path = os.path.splitext(audiopath)[0]+'.pred_mel.pt'
                torch.save(pred_mel.clone(), pred_mel_path)
                pm_audio_path = os.path.splitext(audiopath)[0]+'.pm_audio.pt'# predicted mel audio
                torch.save(y['gt_audio'][j, :y['audio_lengths'][j]].clone(), pm_audio_path)
            if args.save_letter_alignments and hparams.p_arpabet == 0.:
                save_path_align_out = os.path.splitext(audiopath)[0]+'_galign.pt'
                np.save(alignments[j].clone(), save_path_align_out)
            if args.save_phone_alignments and hparams.p_arpabet == 1.:
                save_path_align_out = os.path.splitext(audiopath)[0]+'_palign.pt'
                np.save(alignments[j].clone(), save_path_align_out)
            map = f"{audiopath}|{y['gtext_str'][j]}|{speaker_id_ext}|\n"
            
            f.write(map)# write paths to text file
            processed_files+=1
            print("")
        
        duration = time.time() - duration
        avg_duration = rolling_sum.process(duration)
        time_left = round(((total-i) * avg_duration)/3600, 2)
        print(f'{extra_info}{i}/{total} compute and save GTA melspectrograms in {i}th batch, {duration}s, {time_left}hrs left. {processed_files} processed, {failed_files} failed.')
        duration = time.time()
    f.close()
    
    if n_gpus > 1:
        torch.distributed.barrier()# wait till all graphics cards reach this point.
    
    # merge all generated filelists from every GPU
    filenames = [f'map_{filelisttype}_gpu{j}.txt' for j in range(n_gpus)]
    if rank == 0:
        with open(os.path.join(args.output_directory, f'map_{filelisttype}.txt'), 'w') as outfile:
            for fname in filenames:
                with open(os.path.join(args.output_directory, fname)) as infile:
                    for line in infile:
                        if len(line.strip()):
                            outfile.write(line)
Esempio n. 7
0
if __name__ == '__main__':
    N = 68669  # hard coded because loading the cache is a pain
    cv = model_selection.KFold(n_splits=10)
    cache_path = '~/ct/ct-nlp-data/NLPPipeline_dsk_cache.pbz'
    jobs = ((cache_path, i_split, trn, tst)
            for i_split, (trn, tst) in enumerate(cv.split(list(range(N)))))
    parallel = joblib.Parallel(n_jobs=2)
    data_files = parallel(
        joblib.delayed(generate_split_data)(*job) for job in jobs)

    for i_split, datafile in enumerate(data_files):
        data = torch.load(datafile)
        args = train.parser.parse_args()

        trn_data, val_data =\
            train.prepare_dataloaders(data, batch_size=args.batch_size)
        src_vocab_size = trn_data.dataset.src_vocab_size
        tgt_vocab_size = trn_data.dataset.tgt_vocab_size

        DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        mdl = Transformer(
            src_vocab_size,
            tgt_vocab_size,
            # include <BOS> and <EOS>
            data['settings'].get('max_word_seq_len', 200) + 2,
            tgt_emb_prj_weight_sharing=args.proj_share_weight,
            emb_src_tgt_weight_sharing=args.embs_share_weight,
            d_k=args.d_k,
            d_v=args.d_v,
            d_model=args.d_model,
            d_word_vec=args.d_word_vec,