def main(): args = init() checkpoint = args.checkpoint checkpoint_args_path = os.path.dirname(checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) opt = torch.load(os.path.dirname(checkpoint) + '/args.pth') train_args = opt[0] train_args.noise = 0 train_args.checkpoint = checkpoint args_to_use = args args_to_use = train_args print args_to_use model = Loop(args_to_use) model.cuda() model.load_state_dict( torch.load(args_to_use.checkpoint, map_location=lambda storage, loc: storage)) criterion = MaskedMSE().cuda() loader = get_loader(args.data, args.max_seq_len, args.batch_size, args.nspk) eval_loss = evaluate(model, loader, criterion) print eval_loss
def main(): start_epoch = 1 model = Loop(args) model.cuda() if args.checkpoint != '': checkpoint_args_path = os.path.dirname(args.checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) start_epoch = checkpoint_args[3] model.load_state_dict(torch.load(args.checkpoint)) criterion = MaskedMSE().cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Keep track of losses train_losses = [] eval_losses = [] best_eval = float('inf') # Begin! for epoch in range(start_epoch, start_epoch + args.epochs): train(model, criterion, optimizer, epoch, train_losses) eval_loss = evaluate(model, criterion, epoch, eval_losses) if eval_loss < best_eval: torch.save(model.state_dict(), '%s/bestmodel.pth' % (args.expName)) best_eval = eval_loss torch.save(model.state_dict(), '%s/lastmodel.pth' % (args.expName)) torch.save([args, train_losses, eval_losses, epoch], '%s/args.pth' % (args.expName))
def eval_loss(checkpoint='models/vctk/bestmodel.pth', data='data/vctk', max_seq_len=1000, nspk=22, gpu=0, batch_size=64, seed=1): #args = init() torch.cuda.set_device(gpu) torch.manual_seed(seed) torch.cuda.manual_seed(seed) print checkpoint print os.getcwd() checkpoint_args_path = os.path.dirname(checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) opt = torch.load(os.path.dirname(checkpoint) + '/args.pth') train_args = opt[0] train_args.noise = 0 train_args.checkpoint = checkpoint #args_to_use = args args_to_use = train_args print args_to_use model = Loop(args_to_use) model.cuda() model.load_state_dict( torch.load(args_to_use.checkpoint, map_location=lambda storage, loc: storage)) criterion = MaskedMSE().cuda() loader = get_loader(data, max_seq_len, batch_size, nspk) eval_loss, my_eval_loss, loss_workings = evaluate(model, loader, criterion) print eval_loss print my_eval_loss return eval_loss, loss_workings
def model_def(checkpoint, gpu=-1, valid_loader=None): weights = torch.load(checkpoint, map_location=lambda storage, loc: storage) opt = torch.load(os.path.dirname(checkpoint) + '/args.pth') train_args = opt[0] train_args.noise = 0 #norm = opt[5] #dict = {v: k for k, v in enumerate(code2phone)} norm = np.load(valid_loader.dataset.npzs[0])['audio_norminfo'] model = Loop(train_args) model.load_state_dict(weights) if gpu >= 0: model.cuda() model.eval() return model, norm
def main(): weights = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) opt = torch.load(os.path.dirname(args.checkpoint) + '/args.pth') train_args = opt[0] char2code = {'aa': 0, 'ae': 1, 'ah': 2, 'ao': 3, 'aw': 4, 'ax': 5, 'ay': 6, 'b': 7, 'ch': 8, 'd': 9, 'dh': 10, 'eh': 11, 'er': 12, 'ey': 13, 'f': 14, 'g': 15, 'hh': 16, 'i': 17, 'ih': 18, 'iy': 19, 'jh': 20, 'k': 21, 'l': 22, 'm': 23, 'n': 24, 'ng': 25, 'ow': 26, 'oy': 27, 'p': 28, 'pau': 29, 'r': 30, 's': 31, 'sh': 32, 'ssil': 33, 't': 34, 'th': 35, 'uh': 36, 'uw': 37, 'v': 38, 'w': 39, 'y': 40, 'z': 41} nspkr = train_args.nspk norm_path = None if os.path.exists(train_args.data + '/norm_info/norm.dat'): norm_path = train_args.data + '/norm_info/norm.dat' elif os.path.exists(os.path.dirname(args.checkpoint) + '/norm.dat'): norm_path = os.path.dirname(args.checkpoint) + '/norm.dat' else: print('ERROR: Failed to find norm file.') return train_args.noise = 0 model = Loop(train_args) model.load_state_dict(weights) if args.gpu >= 0: model.cuda() model.eval() if args.spkr not in range(nspkr): print('ERROR: Unknown speaker id: %d.' % args.spkr) return txt, feat, spkr, output_fname = None, None, None, None if args.npz is not '': txt, text, feat = npy_loader_phonemes(args.npz) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(torch.LongTensor([args.spkr]), volatile=True) fname = os.path.basename(args.npz)[:-4] output_fname = fname + '.gen_' + str(args.spkr) words = np.char.split(text).tolist() words = [word.encode('utf-8') for word in words] action = 'none' number = 'none' objectt = 'none' location = 'none' # Remove extra word for special cases if len(words) == 7: words = words[1:] action = words[0] if len(words) == 2: objectt = words[1] elif len(words) > 3: number = words[1] objectt = words[2] location = words[-1] #print(words[0], words[1], words[2], words[-1]) #print(text) # Read dataframe frames = {} if os.path.exists(args.dataset_file): df = pd.read_csv(args.dataset_file) for row in zip(*[df[col].values.tolist() for col in ['path', 'speakerId', 'transcription', 'action', 'number', 'object', 'location']]): frames[row[0]] = {'path': row[0], 'speakerId': row[1], 'transcription': row[2], 'action': row[3], 'number': row[4], 'object': row[5], 'location': row[6]} # Add new data path = os.path.join('wavs/synthetic', output_fname.strip("/") + '.wav') frames[path] = {'path': path, 'speakerId': args.spkr, 'transcription': text, 'action': action, 'number': number, 'object': objectt, 'location': location} paths = [] speakerIds = [] transcriptions = [] actions = [] numbers = [] objects = [] locations = [] for key, frame in frames.items(): paths.append(frame['path']) speakerIds.append(frame['speakerId']) transcriptions.append(frame['transcription']) actions.append(frame['action']) numbers.append(frame['number']) objects.append(frame['object']) locations.append(frame['location']) df = pd.DataFrame(OrderedDict([('path', paths), ('speakerId', speakerIds), ('transcription', transcriptions), ('action', actions), ('number', numbers), ('object', objects), ('location', locations)])) df.to_csv(args.dataset_file) else: print('ERROR: Must supply npz file path or text as source.') return ### key_list = list(char2code.keys()) val_list = list(char2code.values()) phrase = [key_list[val_list.index(letter)] for letter in txt.data.numpy()] #print(phrase) ### if args.gpu >= 0: txt = txt.cuda() feat = feat.cuda() spkr = spkr.cuda() out, attn = model([txt, spkr], feat) out, attn = trim_pred(out, attn) output_dir = os.path.join(os.path.dirname(args.checkpoint), 'results') if not os.path.exists(output_dir): os.makedirs(output_dir) generate_merlin_wav(out.data.cpu().numpy(), output_dir, output_fname, norm_path)
def generate_sample_with_loop( npz='', text='', spkr_id=1, gender=1, checkpoint='models/vctk-16khz-cmu-no-boundaries-all/bestmodel.pth', output_dir='./', npz_path='/home/ubuntu/loop/data/vctk-16khz-cmu-no-boundaries-all/numpy_features', output_file_override=None, ident_override=None): # npz = '' # text = 'Your tickets for the social issues' # text = 'see that girl watch that scene' # npz = '/home/ubuntu/loop/data/vctk/numpy_features/p294_011.npz' # spkr_id = 12 # checkpoint = 'checkpoints/vctk/lastmodel.pth' # checkpoint = 'models/vctk/bestmodel.pth' gender = np.array(gender).reshape(-1) out_dict = dict() if not os.path.exists(output_dir): os.makedirs(output_dir) gpu = 0 # load loop weights & params from checkpoint weights = torch.load(checkpoint, map_location=lambda storage, loc: storage) opt = torch.load(os.path.dirname(checkpoint) + '/args.pth') train_args = opt[0] train_dataset = NpzFolder( '/home/ubuntu/loop/data/vctk-16khz-cmu-no-boundaries-all/numpy_features' ) char2code = train_dataset.dict spkr2code = train_dataset.speakers # print spkr2code.cpu().data norm_path = train_args.data + '/norm_info/norm.dat' norm_path = '/home/ubuntu/loop/data/vctk-16khz-cmu-no-boundaries-all/norm_info/norm.dat' train_args.noise = 0 valid_dataset_path = npz_path + '_valid' # prepare loop model if ident_override: #model = Loop_Ident(train_args) pass else: model = Loop_Base(train_args) model.load_state_dict(weights) if gpu >= 0: model.cuda() model.eval() # check speaker id is valid if spkr_id not in range(len(spkr2code)): print('ERROR: Unknown speaker id: %d.' % spkr_id) # get phone sequence txt, feat, spkr, output_fname = None, None, None, None if npz is not '': # use pre-calculated phonemes etc. txt, feat, pre_calc_feat = npy_loader_phonemes( os.path.join(npz_path, npz)) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(torch.LongTensor([spkr_id]), volatile=True) output_file = os.path.basename(npz)[:-4] + '_' + str(spkr_id) out_dict['pre_calc_feat'] = pre_calc_feat elif text is not '': # use specified text string # extract phonemes from the text txt = text2phone(text, char2code) feat = torch.FloatTensor(txt.size(0) * 20, 63) spkr = torch.LongTensor([spkr_id]) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(spkr, volatile=True) output_file = text.replace(' ', '_') else: print('ERROR: Must supply npz file path or text as source.') raise Exception('Need source') if output_file_override: output_file = output_file_override # use gpu if gpu >= 0: txt = txt.cuda() feat = feat.cuda() spkr = spkr.cuda() # run loop model to generate output features # print(ident_override) if ident_override: loop_feat, attn = model([txt, spkr, gender], feat, ident_override=ident_override) else: loop_feat, attn = model([txt, spkr, gender], feat) loop_feat, attn = trim_pred(loop_feat, attn) # add to output dictionary out_dict['txt'] = txt[:, 0].squeeze().data.tolist() out_dict['spkr'] = spkr out_dict['feat'] = feat.data.cpu().numpy() out_dict['loop_feat'] = loop_feat.data.cpu().numpy() out_dict['attn'] = attn.squeeze().data.cpu().numpy() out_dict['output_file'] = output_file out_dict['valid_dataset_path'] = valid_dataset_path # print output_dir # generate .wav file from loop output features #print(output_dir) #print(output_file) #print(norm_path) generate_merlin_wav(loop_feat.data.cpu().numpy(), output_dir, output_file, norm_path) # generate .wav file from original features for reference if npz is not '': output_orig_fname = os.path.basename(npz)[:-4] + '.orig' generate_merlin_wav(feat[:, 0, :].data.cpu().numpy(), output_dir, output_orig_fname, norm_path) out_dict['output_orig_fname'] = output_orig_fname return out_dict
def main(): weights = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) opt = torch.load(os.path.dirname(args.checkpoint) + '/args.pth') train_args = opt[0] char2code = {'aa': 0, 'ae': 1, 'ah': 2, 'ao': 3, 'aw': 4, 'ax': 5, 'ay': 6, 'b': 7, 'ch': 8, 'd': 9, 'dh': 10, 'eh': 11, 'er': 12, 'ey': 13, 'f': 14, 'g': 15, 'hh': 16, 'i': 17, 'ih': 18, 'iy': 19, 'jh': 20, 'k': 21, 'l': 22, 'm': 23, 'n': 24, 'ng': 25, 'ow': 26, 'oy': 27, 'p': 28, 'pau': 29, 'r': 30, 's': 31, 'sh': 32, 'ssil': 33, 't': 34, 'th': 35, 'uh': 36, 'uw': 37, 'v': 38, 'w': 39, 'y': 40, 'z': 41} nspkr = train_args.nspk norm_path = None if os.path.exists(train_args.data + '/norm_info/norm.dat'): norm_path = train_args.data + '/norm_info/norm.dat' elif os.path.exists(os.path.dirname(args.checkpoint) + '/norm.dat'): norm_path = os.path.dirname(args.checkpoint) + '/norm.dat' else: print('ERROR: Failed to find norm file.') return train_args.noise = 0 model = Loop(train_args) model.load_state_dict(weights) if args.gpu >= 0: model.cuda() model.eval() if args.spkr not in range(nspkr): print('ERROR: Unknown speaker id: %d.' % args.spkr) return txt, feat, spkr, output_fname = None, None, None, None if args.npz is not '': txt, feat = npy_loader_phonemes(args.npz) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(torch.LongTensor([args.spkr]), volatile=True) fname = os.path.basename(args.npz)[:-4] output_fname = fname + '.gen_' + str(args.spkr) elif args.text is not '': txt = text2phone(args.text, char2code) feat = torch.FloatTensor(txt.size(0)*20, 63) spkr = torch.LongTensor([args.spkr]) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(spkr, volatile=True) # slugify input string to file name fname = args.text.replace(' ', '_') valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) fname = ''.join(c for c in fname if c in valid_chars) output_fname = fname + '.gen_' + str(args.spkr) else: print('ERROR: Must supply npz file path or text as source.') return if args.gpu >= 0: txt = txt.cuda() feat = feat.cuda() spkr = spkr.cuda() out, attn = model([txt, spkr], feat) out, attn = trim_pred(out, attn) output_dir = os.path.join(os.path.dirname(args.checkpoint), 'results') if not os.path.exists(output_dir): os.makedirs(output_dir) generate_merlin_wav(out.data.cpu().numpy(), output_dir, output_fname, norm_path) if args.npz is not '': output_orig_fname = os.path.basename(args.npz)[:-4] + '.orig' generate_merlin_wav(feat[:, 0, :].data.cpu().numpy(), output_dir, output_orig_fname, norm_path)
def main(): weights = torch.load(args.checkpoint, map_location=lambda storage, loc: storage) opt = torch.load(os.path.dirname(args.checkpoint) + '/args.pth') train_args = opt[0] train_dataset = NpzFolder(train_args.data + '/numpy_features') char2code = train_dataset.dict spkr2code = train_dataset.speakers norm_path = train_args.data + '/norm_info/norm.dat.npy' train_args.noise = 0 model = Loop(train_args) model.load_state_dict(weights) if args.gpu >= 0: model.cuda() model.eval() if args.spkr not in range(len(spkr2code)): print('ERROR: Unknown speaker id: %d.' % args.spkr) return txt, feat, spkr, output_fname = None, None, None, None if args.npz is not '': txt, feat = npy_loader_phonemes(args.npz) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(torch.LongTensor([args.spkr]), volatile=True) fname = os.path.basename(args.npz)[:-4] output_fname = fname + '.gen_' + str(args.spkr) elif args.text is not '': txt = text2phone(args.text, char2code) #feat = torch.FloatTensor(500, 67) feat = torch.FloatTensor(1500, 67) spkr = torch.LongTensor([args.spkr]) txt = Variable(txt.unsqueeze(1), volatile=True) feat = Variable(feat.unsqueeze(1), volatile=True) spkr = Variable(spkr, volatile=True) fname = args.text.replace(' ', '_') output_fname = fname + '.gen_' + str(args.spkr) else: print('ERROR: Must supply npz file path or text as source.') return if args.gpu >= 0: txt = txt.cuda() feat = feat.cuda() spkr = spkr.cuda() out, attn = model([txt, spkr], feat) out, attn = trim_pred(out, attn) output_dir = os.path.join(os.path.dirname(args.checkpoint), 'results') if not os.path.exists(output_dir): os.makedirs(output_dir) #''' generate_merlin_wav(out.data.cpu().numpy(), output_dir, output_fname, norm_path) #''' #out.data.cpu().numpy().tofile(output_fname) if args.npz is not '': output_orig_fname = os.path.basename(args.npz)[:-4] + '.orig' generate_merlin_wav(feat[:, 0, :].data.cpu().numpy(), output_dir, output_orig_fname, norm_path)
def main(): start_epoch = 1 model = Loop(args) model.cuda() if args.checkpoint != '': checkpoint_args_path = os.path.dirname(args.checkpoint) + '/args.pth' checkpoint_args = torch.load(checkpoint_args_path) start_epoch = checkpoint_args[3] model.load_state_dict( torch.load(args.checkpoint, map_location=lambda storage, loc: storage)) criterion = MaskedMSE().cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Keep track of losses train_losses = [] eval_losses = [] best_eval = float('inf') training_monitor = TrainingMonitor(file=args.expNameRaw, exp_name=args.expNameRaw, b_append=True, path='training_logs') # Begin! for epoch in range(start_epoch, start_epoch + args.epochs): # train model train(model, criterion, optimizer, epoch, train_losses) # evaluate on validation set eval_loss = evaluate(model, criterion, epoch, eval_losses) #chk, _, _, _ = ec.evaluate(model=model, # criterion=criterion, # epoch=epoch, # loader=valid_loader, # metrics=('loss') # ) # save checkpoint for this epoch # I'm saving every epoch so I can compute evaluation metrics across the training curve later on torch.save(model.state_dict(), '%s/epoch_%d.pth' % (args.expName, epoch)) torch.save([args, train_losses, eval_losses, epoch], '%s/args.pth' % (args.expName)) if eval_loss < best_eval: # if this is the best model yet, save it as 'bestmodel' torch.save(model.state_dict(), '%s/bestmodel.pth' % (args.expName)) best_eval = eval_loss # also keep a running copy of 'lastmodel' torch.save(model.state_dict(), '%s/lastmodel.pth' % (args.expName)) torch.save([args, train_losses, eval_losses, epoch], '%s/args.pth' % (args.expName)) # evaluate on a randomised subset of the training set if epoch % args.eval_epochs == 0: train_eval_loader = ec.get_training_data_for_eval( data=args.data, len_valid=len(valid_loader.dataset)) train_loss, _, _, _ = ec.evaluate(model=model, criterion=criterion, epoch=epoch, loader=train_eval_loader, metrics=('loss')) else: train_loss = None # store loss metrics training_monitor.insert(epoch=epoch, valid_loss=eval_loss, train_loss=train_loss) training_monitor.write()