def phase_3_train_encoder(params): from io_modules.dataset import Dataset from models.encoder import Encoder from trainers.encoder import Trainer trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') character2int = {} for train_file in trainset.files: from io_modules.dataset import DatasetIO dio = DatasetIO() lab_list = dio.read_lab(train_file + ".txt") for entry in lab_list: if entry.phoneme not in character2int: character2int[entry.phoneme] = len(character2int) sys.stdout.write('Found ' + str(len(character2int)) + ' unique phonemes\n') f = open('data/models/encoder.chars', 'w') for char in character2int: f.write( char.encode('utf-8') + '\t' + str(character2int[char]) + '\n') f.close() encoder = Encoder(params, len(character2int), character2int) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') encoder.load('data/models/rnn_encoder') trainer = Trainer(encoder, trainset, devset) trainer.start_training(10, 1000)
def phase_4_train_pvocoder(params): from io_modules.dataset import Dataset from models.vocoder import Vocoder from models.vocoder import ParallelVocoder from trainers.vocoder import Trainer vocoder_wavenet = Vocoder(params) sys.stdout.write('Loading wavenet vocoder\n') vocoder_wavenet.load('data/models/nn_vocoder') vocoder = ParallelVocoder(params, vocoder_wavenet) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') vocoder.load('data/models/pnn_vocoder') trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') trainer = Trainer(vocoder, trainset, devset, target_output_path='data/models/pnn_vocoder') trainer.start_training(20, params.batch_size, params.target_sample_rate, params=params)
def phase_3_train_encoder(params): from io_modules.dataset import Dataset from io_modules.dataset import Encodings from models.encoder import Encoder from trainers.encoder import Trainer trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') encodings = Encodings() count = 0 if not params.resume: for train_file in trainset.files: count += 1 if count % 100 == 0: sys.stdout.write('\r' + str(count) + '/' + str(len(trainset.files)) + ' processed files') sys.stdout.flush() from io_modules.dataset import DatasetIO dio = DatasetIO() lab_list = dio.read_lab(train_file + ".lab") for entry in lab_list: encodings.update(entry) sys.stdout.write('\r' + str(count) + '/' + str(len(trainset.files)) + ' processed files\n') sys.stdout.write('Found ' + str(len(encodings.char2int)) + ' unique symbols, ' + str(len(encodings.context2int)) + ' unique features and ' + str(len(encodings.speaker2int)) + ' unique speakers\n') encodings.store('data/models/encoder.encodings') else: encodings.load('data/models/encoder.encodings') if params.resume: runtime = True # avoid ortonormal initialization else: runtime = False encoder = Encoder(params, encodings, runtime=runtime) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') encoder.load('data/models/rnn_encoder') if params.no_guided_attention: sys.stdout.write('Disabling guided attention\n') if params.no_bounds: sys.stdout.write( 'Using internal stopping condition for synthesis\n') trainer = Trainer(encoder, trainset, devset) trainer.start_training(10, 1000, params)
def phase_5_test_vocoder(params): from io_modules.dataset import Dataset from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params, runtime=True) vocoder.load('data/models/rnn') trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str( len(devset.files)) + ' development files\n') trainer = Trainer(vocoder, trainset, devset) trainer.synth_devset(params.batch_size, target_sample_rate=params.target_sample_rate, sample=True, temperature=0.8)
def phase_2_train_vocoder(params): from io_modules.dataset import Dataset from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params) if params.resume: sys.stdout.write('Resuming from previous checkpoint\n') vocoder.load('data/models/rnn') trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str( len(devset.files)) + ' development files\n') trainer = Trainer(vocoder, trainset, devset) trainer.start_training(20, params.batch_size, params.target_sample_rate)
def phase_7_train_sparse(params): sys.stdout.write("Starting sparsification for VanillaLSTM\n") from io_modules.dataset import Dataset from models.vocoder import Vocoder from trainers.vocoder import Trainer vocoder = Vocoder(params, use_sparse_lstm=True) sys.stdout.write('Resuming from previous checkpoint\n') vocoder.load('data/models/rnn_vocoder_sparse') sys.stdout.write("Reading datasets\n") sys.stdout.flush() trainset = Dataset("data/processed/train") devset = Dataset("data/processed/dev") sys.stdout.write('Found ' + str(len(trainset.files)) + ' training files and ' + str(len(devset.files)) + ' development files\n') sys.stdout.flush() trainer = Trainer(vocoder, trainset, devset) trainer.start_training(20, params.batch_size, params.target_sample_rate, params=params)