def start_training(self, itt_no_improve, batch_size, target_sample_rate, params=None): epoch = 1 left_itt = itt_no_improve dio = DatasetIO() self._render_devset() sys.stdout.write("\n") # self.synth_devset(batch_size, target_sample_rate) self.vocoder.store(self.target_output_path) num_files = 0 while left_itt > 0: sys.stdout.write("Starting epoch " + str(epoch) + "\n") sys.stdout.write("Shuffling training data\n") from random import shuffle shuffle(self.trainset.files) file_index = 1 total_loss = 0 for file in self.trainset.files: num_files += 1 sys.stdout.write("\t" + str(file_index) + "/" + str(len(self.trainset.files)) + " processing file " + file + '\n') sys.stdout.flush() wav_file = file + ".orig.wav" mgc_file = file + ".mgc.npy" mgc = np.load(mgc_file) file_index += 1 data, sample_rate = dio.read_wave(wav_file) # wave_disc = data * 32768 wave_disc = np.array(data, dtype=np.float32) import time start = time.time() loss = self.vocoder.learn(wave_disc, mgc, batch_size) total_loss += loss stop = time.time() sys.stdout.write(' avg loss=' + str(loss) + " execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() if file_index % 5000 == 0: self.vocoder.store(self.target_output_path) self.synth_devset(batch_size, target_sample_rate) self.vocoder.store(self.target_output_path) self.synth_devset(batch_size, target_sample_rate) epoch += 1
def start_training(self, itt_no_improve, batch_size, target_sample_rate): epoch = 1 left_itt = itt_no_improve dio = DatasetIO() self._render_devset() sys.stdout.write("\n") self.vocoder.store('data/models/rnn_vocoder') while left_itt > 0: sys.stdout.write("Starting epoch " + str(epoch) + "\n") sys.stdout.write("Shuffling training data\n") from random import shuffle shuffle(self.trainset.files) file_index = 1 total_loss = 0 for file in self.trainset.files: sys.stdout.write("\t" + str(file_index) + "/" + str(len(self.trainset.files)) + " processing file " + file) sys.stdout.flush() wav_file = file + ".orig.wav" mgc_file = file + ".mgc.npy" mgc = np.load(mgc_file) file_index += 1 data, sample_rate = dio.read_wave(wav_file) if self.use_ulaw: [wave_disc, ulaw_cont] = dio.ulaw_encode(data) else: wave_disc = dio.b16_enc(data) import time start = time.time() loss = self.vocoder.learn(wave_disc, mgc, batch_size) total_loss += loss stop = time.time() sys.stdout.write(' avg loss=' + str(loss) + " execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() if file_index % 50 == 0: self.synth_devset(batch_size, target_sample_rate) self.vocoder.store('data/models/rnn_vocoder') self.synth_devset(batch_size, target_sample_rate) self.vocoder.store('data/models/rnn_vocoder') epoch += 1
def phase_1_prepare_corpus(params): from os import listdir from os.path import isfile, join from os.path import exists train_files_tmp = [ f for f in listdir(params.train_folder) if isfile(join(params.train_folder, f)) ] dev_files_tmp = [ f for f in listdir(params.dev_folder) if isfile(join(params.dev_folder, f)) ] sys.stdout.write("Scanning training files...") sys.stdout.flush() final_list = [] for file in train_files_tmp: base_name = file[:-4] lab_name = base_name + '.txt' wav_name = base_name + '.wav' if exists(join(params.train_folder, lab_name)) and exists( join(params.train_folder, wav_name)): if base_name not in final_list: final_list.append(base_name) train_files = final_list sys.stdout.write(" found " + str(len(train_files)) + " valid training files\n") sys.stdout.write("Scanning development files...") sys.stdout.flush() final_list = [] for file in dev_files_tmp: base_name = file[:-4] lab_name = base_name + '.txt' wav_name = base_name + '.wav' if exists(join(params.dev_folder, lab_name)) and exists( join(params.dev_folder, wav_name)): if base_name not in final_list: final_list.append(base_name) dev_files = final_list sys.stdout.write(" found " + str(len(dev_files)) + " valid development files\n") from io_modules.dataset import DatasetIO from io_modules.vocoder import MelVocoder from shutil import copyfile import pysptk dio = DatasetIO() vocoder = MelVocoder() base_folder = params.train_folder for index in range(len(train_files)): sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" + str(len(train_files))) sys.stdout.flush() base_name = train_files[index] txt_name = base_name + '.txt' wav_name = base_name + '.wav' spc_name = base_name + '.png' lab_name = base_name + '.lab' # LAB - copy or create if exists(join(base_folder, lab_name)): copyfile(join(base_folder, lab_name), join('data/processed/train', lab_name)) else: create_lab_file(join(base_folder, txt_name), join('data/processed/train', lab_name)) # TXT copyfile(join(base_folder, txt_name), join('data/processed/train', txt_name)) # WAVE data, sample_rate = dio.read_wave( join(base_folder, wav_name), sample_rate=params.target_sample_rate) mgc = vocoder.melspectrogram(data, sample_rate=params.target_sample_rate, num_mels=params.mgc_order) # SPECT render_spectrogram(mgc, join('data/processed/train', spc_name)) dio.write_wave( join('data/processed/train', base_name + '.orig.wav'), data, sample_rate) array2file(mgc, join('data/processed/train', base_name + '.mgc')) sys.stdout.write('\n') base_folder = params.dev_folder for index in range(len(dev_files)): sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" + str(len(dev_files))) sys.stdout.flush() base_name = dev_files[index] txt_name = base_name + '.txt' wav_name = base_name + '.wav' spc_name = base_name + '.png' lab_name = base_name + '.lab' # LAB - copy or create if exists(join(base_folder, lab_name)): copyfile(join(base_folder, lab_name), join('data/processed/dev', lab_name)) else: create_lab_file(join(base_folder, txt_name), join('data/processed/dev', lab_name)) # TXT copyfile(join(base_folder, txt_name), join('data/processed/dev/', txt_name)) # WAVE data, sample_rate = dio.read_wave( join(base_folder, wav_name), sample_rate=params.target_sample_rate) mgc = vocoder.melspectrogram(data, sample_rate=params.target_sample_rate, num_mels=params.mgc_order) # SPECT render_spectrogram(mgc, join('data/processed/dev', spc_name)) dio.write_wave(join('data/processed/dev', base_name + '.orig.wav'), data, sample_rate) array2file(mgc, join('data/processed/dev', base_name + '.mgc')) sys.stdout.write('\n')
def start_training(self, itt_no_improve, batch_size, target_sample_rate, params=None): epoch = 1 left_itt = itt_no_improve dio = DatasetIO() self._render_devset() sys.stdout.write("\n") if self.vocoder.sparse: print("Setting sparsity at: " + str(params.sparsity_step) + "%") sparsity = params.sparsity_step self.vocoder.rnnFine.set_sparsity(float(sparsity) / 100) self.vocoder.rnnCoarse.set_sparsity(float(sparsity) / 100) if self.vocoder.sparse: self.vocoder.store('data/models/rnn_vocoder_sparse') else: self.vocoder.store('data/models/rnn_vocoder') num_files = 0 while left_itt > 0: sys.stdout.write("Starting epoch " + str(epoch) + "\n") sys.stdout.write("Shuffling training data\n") from random import shuffle shuffle(self.trainset.files) file_index = 1 total_loss = 0 for file in self.trainset.files: num_files += 1 if num_files == params.sparsity_increase: sparsity += params.sparsity_step num_files = 0 if sparsity <= params.sparsity_target: print("Setting sparsity at " + str(sparsity) + "%") self.vocoder.rnnFine.set_sparsity( float(sparsity) / 100) self.vocoder.rnnCoarse.set_sparsity( float(sparsity) / 100) else: sparsity = params.sparsity_target sys.stdout.write("\t" + str(file_index) + "/" + str(len(self.trainset.files)) + " processing file " + file) sys.stdout.flush() wav_file = file + ".orig.wav" mgc_file = file + ".mgc.npy" mgc = np.load(mgc_file) file_index += 1 data, sample_rate = dio.read_wave(wav_file) if self.use_ulaw: [wave_disc, ulaw_cont] = dio.ulaw_encode(data) else: wave_disc = dio.b16_enc(data) import time start = time.time() loss = self.vocoder.learn(wave_disc, mgc, batch_size) total_loss += loss stop = time.time() sys.stdout.write(' avg loss=' + str(loss) + " execution time=" + str(stop - start)) sys.stdout.write('\n') sys.stdout.flush() if file_index % 50 == 0: self.synth_devset(batch_size, target_sample_rate) if self.vocoder.sparse: self.vocoder.store('data/models/rnn_vocoder_sparse') else: self.vocoder.store('data/models/rnn_vocoder') self.synth_devset(batch_size, target_sample_rate) if self.vocoder.sparse: self.vocoder.store('data/models/rnn_vocoder_sparse') else: self.vocoder.store('data/models/rnn_vocoder') epoch += 1
def phase_1_prepare_corpus(params): from os import listdir from os.path import isfile, join from os.path import exists train_files_tmp = [ f for f in listdir(params.train_folder) if isfile(join(params.train_folder, f)) ] if params.dev_folder is not None: dev_files_tmp = [ f for f in listdir(params.dev_folder) if isfile(join(params.dev_folder, f)) ] else: dev_files_tmp = [] if params.g2p is not None: from models.g2p import G2P from io_modules.encodings import Encodings g2p_encodings = Encodings() g2p_encodings.load(params.g2p + '.encodings') g2p = G2P(g2p_encodings) g2p.load(params.g2p + '-bestAcc.network') if exists(params.g2p + '.lexicon'): g2p.load_lexicon(params.g2p + '.lexicon') else: g2p = None sys.stdout.write("Scanning training files...") sys.stdout.flush() final_list = [] for file in train_files_tmp: base_name = file[:-4] lab_name = base_name + '.txt' wav_name = base_name + '.wav' if exists(join(params.train_folder, lab_name)) and exists( join(params.train_folder, wav_name)): if base_name not in final_list: final_list.append(base_name) train_files = final_list sys.stdout.write(" found " + str(len(train_files)) + " valid training files\n") sys.stdout.write("Scanning development files...") sys.stdout.flush() final_list = [] for file in dev_files_tmp: base_name = file[:-4] lab_name = base_name + '.txt' wav_name = base_name + '.wav' if exists(join(params.dev_folder, lab_name)) and exists( join(params.dev_folder, wav_name)): if base_name not in final_list: final_list.append(base_name) dev_files = final_list sys.stdout.write(" found " + str(len(dev_files)) + " valid development files\n") from io_modules.dataset import DatasetIO from io_modules.vocoder import MelVocoder from shutil import copyfile dio = DatasetIO() vocoder = MelVocoder() base_folder = params.train_folder total_files = 0 for index in range(len(train_files)): total_files += 1 sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" + str(len(train_files))) sys.stdout.flush() base_name = train_files[index] txt_name = base_name + '.txt' wav_name = base_name + '.wav' spc_name = base_name + '.png' lab_name = base_name + '.lab' tgt_txt_name = txt_name tgt_spc_name = spc_name tgt_lab_name = lab_name if params.prefix is not None: tgt_txt_name = params.prefix + "_{:05d}".format( total_files) + '.txt' tgt_spc_name = params.prefix + "_{:05d}".format( total_files) + '.png' tgt_lab_name = params.prefix + "_{:05d}".format( total_files) + '.lab' # LAB - copy or create if exists(join(base_folder, lab_name)): copyfile(join(base_folder, lab_name), join('data/processed/train', tgt_lab_name)) else: create_lab_file(join(base_folder, txt_name), join('data/processed/train', tgt_lab_name), speaker_name=params.speaker, g2p=g2p) # TXT copyfile(join(base_folder, txt_name), join('data/processed/train', tgt_txt_name)) # WAVE data, sample_rate = dio.read_wave( join(base_folder, wav_name), sample_rate=params.target_sample_rate) mgc = vocoder.melspectrogram(data, sample_rate=params.target_sample_rate, num_mels=params.mgc_order) # SPECT render_spectrogram(mgc, join('data/processed/train', tgt_spc_name)) if params.prefix is None: dio.write_wave( join('data/processed/train', base_name + '.orig.wav'), data, sample_rate) array2file(mgc, join('data/processed/train', base_name + '.mgc')) else: tgt_wav_name = params.prefix + "_{:05d}".format( total_files) + '.orig.wav' tgt_mgc_name = params.prefix + "_{:05d}".format( total_files) + '.mgc' dio.write_wave(join('data/processed/train', tgt_wav_name), data, sample_rate) array2file(mgc, join('data/processed/train', tgt_mgc_name)) sys.stdout.write('\n') base_folder = params.dev_folder for index in range(len(dev_files)): total_files += 1 sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" + str(len(dev_files))) sys.stdout.flush() base_name = dev_files[index] txt_name = base_name + '.txt' wav_name = base_name + '.wav' spc_name = base_name + '.png' lab_name = base_name + '.lab' tgt_txt_name = txt_name tgt_spc_name = spc_name tgt_lab_name = lab_name if params.prefix is not None: tgt_txt_name = params.prefix + "_{:05d}".format( total_files) + '.txt' tgt_spc_name = params.prefix + "_{:05d}".format( total_files) + '.png' tgt_lab_name = params.prefix + "_{:05d}".format( total_files) + '.lab' # LAB - copy or create if exists(join(base_folder, lab_name)): copyfile(join(base_folder, lab_name), join('data/processed/dev', tgt_lab_name)) else: create_lab_file(join(base_folder, txt_name), join('data/processed/dev', tgt_lab_name), speaker_name=params.speaker, g2p=g2p) # TXT copyfile(join(base_folder, txt_name), join('data/processed/dev', tgt_txt_name)) # WAVE data, sample_rate = dio.read_wave( join(base_folder, wav_name), sample_rate=params.target_sample_rate) mgc = vocoder.melspectrogram(data, sample_rate=params.target_sample_rate, num_mels=params.mgc_order) # SPECT render_spectrogram(mgc, join('data/processed/dev', tgt_spc_name)) if params.prefix is None: dio.write_wave( join('data/processed/dev', base_name + '.orig.wav'), data, sample_rate) array2file(mgc, join('data/processed/dev', base_name + '.mgc')) else: tgt_wav_name = params.prefix + "_{:05d}".format( total_files) + '.orig.wav' tgt_mgc_name = params.prefix + "_{:05d}".format( total_files) + '.mgc' dio.write_wave(join('data/processed/dev', tgt_wav_name), data, sample_rate) array2file(mgc, join('data/processed/dev', tgt_mgc_name)) sys.stdout.write('\n')