def get_model(hps_path, model_path): hps = Hps() hps.load(hps_path) hps_tuple = hps.get_tuple() solver = Solver(hps_tuple, None, None) solver.load_model(model_path) return solver
def get_model( hps_path='./hps/vctk.json', model_path='/storage/model/voice_conversion/vctk/clf/model.pkl-109999' ): hps = Hps() hps.load(hps_path) hps_tuple = hps.get_tuple() solver = Solver(hps_tuple, None) solver.load_model(model_path) return solver
parser.add_argument('-hps', help='The path of hyper-parameter set', default='vctk.json') parser.add_argument('-model', '-m', help='The path of model checkpoint') parser.add_argument('-source', '-s', help='The path of source .wav file') parser.add_argument( '-target', '-t', help= 'Target speaker id (integer). Same order as the speaker list when preprocessing (en_speaker_used.txt)' ) parser.add_argument('-output', '-o', help='output .wav path') parser.add_argument('-sample_rate', '-sr', default=16000, type=int) parser.add_argument('--use_gen', default=True, action='store_true') args = parser.parse_args() hps = Hps() hps.load(args.hps) hps_tuple = hps.get_tuple() solver = Solver(hps_tuple, None) solver.load_model(args.model) _, spec = get_spectrograms(args.source) spec_expand = np.expand_dims(spec, axis=0) spec_tensor = torch.from_numpy(spec_expand).type(torch.FloatTensor) c = Variable(torch.from_numpy(np.array([int(args.target)]))).cuda() result = solver.test_step(spec_tensor, c, gen=args.use_gen) result = result.squeeze(axis=0).transpose((1, 0)) wav_data = spectrogram2wav(result) write(args.output, rate=args.sample_rate, data=wav_data)
loss = loss_rec - current_alpha * loss_clf reset_grad([self.Encoder, self.Decoder]) loss.backward() grad_clip([self.Encoder, self.Decoder], self.hps.max_grad_norm) self.ae_opt.step() info = { f'{flag}/loss_rec': loss_rec.item(), f'{flag}/G_loss_clf': loss_clf.item(), f'{flag}/alpha': current_alpha, f'{flag}/G_acc': acc, } slot_value = (iteration + 1, hps.iters) + tuple( [value for value in info.values()]) log = 'G:[%06d/%06d], loss_rec=%.3f, loss_clf=%.2f, alpha=%.2e, acc=%.2f' print(log % slot_value) if iteration % 100 == 0: for tag, value in info.items(): self.logger.scalar_summary(tag, value, iteration + 1) if iteration % 1000 == 0 or iteration + 1 == hps.iters: self.save_model(model_path, iteration) if __name__ == '__main__': hps = Hps() hps.load('./hps/v7.json') hps_tuple = hps.get_tuple() dataset = myDataset('/storage/raw_feature/voice_conversion/vctk/vctk.h5',\ '/storage/raw_feature/voice_conversion/vctk/64_513_2000k.json') data_loader = DataLoader(dataset) solver = Solver(hps_tuple, data_loader)
import pickle from utils import Hps from utils import DataLoader from utils import Logger from utils import myDataset from utils import Indexer from solver import Solver from preprocess.tacotron.utils import spectrogram2wav #from preprocess.tacotron.audio import inv_spectrogram, save_wav from scipy.io.wavfile import write from preprocess.tacotron.mcep import mc2wav if __name__ == '__main__': feature = 'sp' hps = Hps() hps.load('./hps/v19.json') hps_tuple = hps.get_tuple() solver = Solver(hps_tuple, None) solver.load_model('/storage/model/voice_conversion/v19/model.pkl-59999') if feature == 'mc': # indexer to extract data indexer = Indexer() src_mc = indexer.index(speaker_id='225', utt_id='366', dset='test', feature='norm_mc') tar_mc = indexer.index(speaker_id='226', utt_id='366', dset='test', feature='norm_mc') expand_src_mc = np.expand_dims(src_mc, axis=0)
import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--train', default=True, action='store_true') parser.add_argument('--test', default=False, action='store_true') parser.add_argument('--load_model', default=False, action='store_true') parser.add_argument('-flag', default='train') parser.add_argument('-hps_path', default='./hps/vctk.json') parser.add_argument('-load_model_path', default='./pkl/pretrain.pkl') parser.add_argument('-dataset_path', default='./vctk.h5') parser.add_argument('-index_path', default='./index.json') parser.add_argument('-output_model_path', default='./pkl/model.pkl') args = parser.parse_args() hps = Hps() hps.load(args.hps_path) hps_tuple = hps.get_tuple() dataset = SingleDataset(args.dataset_path, args.index_path, seg_len=hps_tuple.seg_len) data_loader = DataLoader(dataset) solver = Solver(hps_tuple, data_loader) if args.load_model: solver.load_model(args.load_model_path) if args.train: solver.train(args.output_model_path, args.flag, mode='pretrain_G') solver.train(args.output_model_path, args.flag, mode='pretrain_D') solver.train(args.output_model_path, args.flag, mode='train') solver.train(args.output_model_path, args.flag, mode='patchGAN')
def valid_step(self, batch_x, batch_y): loss = self.sess.run( self._log_loss, feed_dict={self.x:batch_x, self.y:batch_y, self.kp:1.0} ) return loss def train_step(self, batch_x, batch_y, coverage=False): if not coverage: _, loss = self.sess.run( [self._nll_opt, self._log_loss], feed_dict={self.x:batch_x, self.y:batch_y, self.kp:self._hps.keep_prob} ) else: _, loss = self.sess.run( [self._coverage_opt, self._coverage_loss], feed_dict={self.x:batch_x, self.y:batch_y, self.kp:self._hps.keep_prob} ) return loss if __name__ == '__main__': vocab = Vocab() hps = Hps() hps.load('./hps/cd_v3.json') hps_tuple = hps.get_tuple() model = PointerModel(hps_tuple, vocab) model.init() print('model build OK') model.tt()