def track_top_models(self, mel_loss, gen_wav, model): """ Keeps track of top k models and saves them according to their current rank """ for j, (l, g, m, m_n) in enumerate(self.top_k_models): print(f'{j} {l} {m} {m_n}') if len(self.top_k_models) < self.train_cfg[ 'keep_top_k'] or mel_loss < self.top_k_models[-1][0]: m_step = model.get_step() model_name = f'model_loss{mel_loss:#0.5}_step{m_step}_weights.pyt' self.top_k_models.append( (mel_loss, gen_wav, model.get_step(), model_name)) self.top_k_models.sort(key=lambda t: t[0]) self.top_k_models = self.top_k_models[:self. train_cfg['keep_top_k']] model.save(self.paths.voc_top_k / model_name) all_models = get_files(self.paths.voc_top_k, extension='pyt') top_k_names = {m[-1] for m in self.top_k_models} for model_file in all_models: if model_file.name not in top_k_names: print(f'removing {model_file}') os.remove(model_file) pickle_binary(self.top_k_models, self.paths.voc_top_k / 'top_k.pkl') for i, (mel_loss, g_wav, m_step, m_name) in enumerate(self.top_k_models, 1): self.writer.add_audio(tag=f'Top_K_Models/generated_top_{i}', snd_tensor=g_wav, global_step=m_step, sample_rate=self.dsp.sample_rate)
def ljspeech(path: Union[str, Path]): csv_file = get_files(path, extension='.csv') assert len(csv_file) == 1 text_dict = {} with open(str(csv_file[0]), encoding='utf-8') as f: for line in f: split = line.split('|') text_dict[split[0]] = split[-1] return text_dict
def main(config: dict, data_set_name: str): development_mode = False logger.info(config['data_path']) store = DataStore(path=config['data_path'], store_type='csv', store_name=data_set_name, freshness=0) logger.debug('Start data extraction process...') if not store.check(key='data'): if not development_mode or not store.check(key='raw_data'): if not store.check(key='tables'): if not store.check(key='files'): logger.debug('Retrieving all files...') files = get_files(**config['file_config']) saved = store.save(files, key='files') if not saved: logger.warning('No files found. Check logs.') return False else: files = store.load(key='files') logger.debug('Retrieving all tables...') tables = get_tables(files, **config['sheet_config']) saved = store.save(tables, key='tables') if not saved: logger.warning('No files could be opened. Check logs.') return False else: tables = store.load(key='tables') logger.debug('Retrieving raw data...') raw_data = get_excel_data(tables, config['sheet_config'], config['mappings'], config['meta_range']) if development_mode: store.save(raw_data['values'], key='raw_data') store.save(raw_data['type_info'], key='type_info') else: raw_data = { 'values': store.load(key='raw_data'), 'type_info': store.load(key='type_info'), } logger.debug('Processing raw data...') data = process_data(raw_data['values'], raw_data['type_info'], config['mappings']) store.save(data, key='data') else: data = store.load(key='data') logger.debug('Done!') if config['special']: try: logger.info('Writing the Excel Report for %s...' % data_set_name) config['report_function'](data, data_set_name) except: logger.exception('Writing the Excel Report failed:') return data
def nick(path) : csv_file = get_files(path, extension='.csv') assert len(csv_file) == 1 text_dict = {} with open(csv_file[0], encoding='utf-8') as f : for line in f : split = line.rstrip().split('|') text_dict[split[0]] = split[-1] return text_dict
def ljspeech(path: Union[str, Path]): txt_file = get_files(path, extension='.csv') assert len(txt_file) == 1 text_dict = {} with open(txt_file[0], encoding='utf-8') as f: for line in f: split = line.split('|') text_dict[split[0]] = split[-2] print('-' * 20 + 'Text dict' + '-' * 20) print(text_dict) return text_dict
def ljspeech(path, data_dir): csv_file = get_files(path, extension='.csv') assert len(csv_file) == 1 wavs = [] #texts = [] #encode = [] with open(csv_file[0], encoding='utf-8') as f_: # if 'phoneme_cleaners' in hp.tts_cleaner_names: # print("Cleaner : {} Language Code : {}\n".format(hp.tts_cleaner_names[0],hp.phoneme_language)) # for line in f : # split = line.split('|') # text_dict[split[0]] = text2phone(split[-1].strip(),hp.phoneme_language) # else: print("Cleaner : {} \n".format(hp.tts_cleaner_names)) for line in f_: sub = {} split = line.split('|') t = split[-1].strip().upper() # t = t.replace('"', '') # t = t.replace('-', ' ') # t = t.replace(';','') # t = t.replace('(', '') # t = t.replace(')', '') # t = t.replace(':', '') # t = re.sub('[^A-Za-z0-9.!?,\' ]+', '', t) if len(t) > 0: wavs.append(split[0].strip()) #texts.append(t) #encode.append(text_to_sequence(t, hp.tts_cleaner_names)) # with open(os.path.join(data_dir, 'train.txt'), 'w', encoding='utf-8') as f: # for w, t, e in zip(wavs, texts, encode): # f.write('{}|{}|{}'.format(w,e,t) + '\n') return wavs #, texts, encode
def ljspeech(path: Union[str, Path], dt=False): csv_file = get_files(path, extension='.csv') if dt: for f in csv_file: if str(f).endswith('tmp_tts_train.csv'): csv_file = [f] else: for f in csv_file: if str(f).endswith('/cleaned_train_tts.csv'): csv_file = [f] text_dict = {} print(f'Using {csv_file} train file ...') # exit() with open(csv_file[0], encoding='utf-8') as f : for line in f : split = line.split('|') # print(split) text_dict[split[0]] = split[-1] # print(len(text_dict)) # exit() return text_dict
y, mu=2**hp.bits) if hp.mu_law else float_2_label(y, bits=hp.bits) elif hp.voc_mode == 'MOL': quant = float_2_label(y, bits=16) return mel.astype(np.float32), quant.astype(np.int64) def process_wav(path: Path): wav_id = path.stem m, x = convert_file(path) np.save(paths.mel / f'{wav_id}.npy', m, allow_pickle=False) np.save(paths.quant / f'{wav_id}.npy', x, allow_pickle=False) return wav_id, m.shape[-1] wav_files = get_files(path, extension) paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id) print(f'\n{len(wav_files)} {extension[1:]} files found in "{path}"\n') if len(wav_files) == 0: print('Please point wav_path in hparams.py to your dataset,') print('or use the --path option.\n') else: text_dict = ljspeech(path) with open(paths.data / 'text_dict.pkl', 'wb') as f: pickle.dump(text_dict, f) n_workers = max(1, args.num_workers)
def ljspeech(path: Union[str, Path]): csv_file = get_files(path, extension='.txt') text_dict = {} clean_text = ['\t', '\n', '\x00'] count_edge = 0 first_case = 0 lengthzero_underscore_reject = 0 key_no_match = 0 lengthzero_underscore_accept = 0 zero_length = 0 key_no_match_outlier = 0 count = 0 file_matching_counter = {} for i in range(len(csv_file)): with open(csv_file[i], encoding='utf-8') as f : for line in f : split = line.split('\t') #space between key and text if len(split)>1: for i in range(len(split)): for remove_word in clean_text: if remove_word in split[i]: split[i] = re.sub(remove_word, '', split[i]) text_dict[split[0]] = split[-1] first_case += 1 else: # if count_edge < 10000: if len(split)>1: print('check length 1+') split = split[0] for remove_word in clean_text: if remove_word in split: split = re.sub(remove_word, '', split) if len(split) > 0: count += 1 #key in underscore underscore_split = split.split('_') if len(underscore_split)>2: key_text_mix = underscore_split[2:][0] current_key = '' for ch in key_text_mix: if ch.isdigit(): current_key += ch else: break key = underscore_split[0] + '_' + underscore_split[1] + '_' + current_key if len(key_text_mix[len(current_key):]) > 0: text_dict[key] = key_text_mix[len(current_key):] lengthzero_underscore_accept += 1 else: lengthzero_underscore_reject += 1 #key not corresponding to file names else: # if count_edge <10: key_from_file = str(csv_file[i]).split('/')[-1][:-4] if key_from_file in file_matching_counter: file_matching_counter[key_from_file] += 1 else: file_matching_counter[key_from_file] = 1 current_key = key_from_file + '_' + str(file_matching_counter[key_from_file]) # print(current_key, split, csv_file[i], key_from_file) # print(current_key, split.split('.')) if len(split.split('.')) == 2: text_dict[current_key] = split.split('.')[-1] key_no_match += 1 else: key_no_match_outlier += 1 else: zero_length += 1 print('first_case:', first_case) print('key_no_match:', key_no_match) print('lengthzero_underscore_reject:',lengthzero_underscore_reject) print('lengthzero_underscore_accept:',lengthzero_underscore_accept) print('zero_length:', zero_length) print('key_no_match_outlier:',key_no_match_outlier) print('total:', first_case+key_no_match+lengthzero_underscore_reject+lengthzero_underscore_accept+key_no_match_outlier) return text_dict
import os import hparams as hp from utils.files import get_files if __name__ == '__main__': min_e = [] min_p = [] max_e = [] max_p = [] nz_min_p = [] nz_min_e = [] energy_path = os.path.join(hp.data_dir, 'energy') pitch_path = os.path.join(hp.data_dir, 'pitch') mel_path = os.path.join(hp.data_dir, 'mels') energy_files = get_files(energy_path, extension='.npy') pitch_files = get_files(pitch_path, extension='.npy') mel_files = get_files(mel_path, extension='.npy') assert len(energy_files) == len(pitch_files) == len(mel_files) for f in energy_files: e = np.load(f) min_e.append(e.min()) nz_min_e.append(e[e > 0].min()) max_e.append(e.max()) for f in pitch_files: p = np.load(f) min_p.append(p.min()) nz_min_p.append(p[p > 0].min())
def process_wav_test(input_paths): input_clean_test_path, input_noisy_test_path = input_paths print("Processing -> {0} vs {1}".format(input_clean_test_path, input_noisy_test_path)) if input_clean_test_path.stem != input_noisy_test_path.stem: raise RuntimeError("Those are different samples!{0} vs {1}".format( input_clean_test_path, input_noisy_test_path)) wav_id = input_clean_test_path.stem m, x = convert_file(input_clean_test_path, input_noisy_test_path) np.save(paths.test_mel / f'{wav_id}.npy', m, allow_pickle=False) np.save(paths.test_quant / f'{wav_id}.npy', x, allow_pickle=False) return wav_id, m.shape[-1] wav_files_clean = sorted(get_files(path_clean, extension), key=lambda p: p.stem) wav_files_noisy = sorted(get_files(path_noisy, extension), key=lambda p: p.stem) wav_files_clean_test = sorted( get_files(path_clean_test, extension), key=lambda p: p.stem) if path_clean_test is not None else None wav_files_noisy_test = sorted( get_files(path_noisy_test, extension), key=lambda p: p.stem) if path_noisy_test is not None else None paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id) print( f'\n{len(wav_files_clean)} {extension[1:]} files found in "{path_clean}"\n' )
else: raise ValueError(f'Unexpected voc mode {self.dsp.voc_mode}, should be either RAW or MOL.') return mel.astype(np.float32), quant.astype(np.int64), pitch.astype(np.float32) parser = argparse.ArgumentParser(description='Preprocessing for WaveRNN and Tacotron') parser.add_argument('--path', '-p', help='directly point to dataset path') parser.add_argument('--num_workers', '-w', metavar='N', type=valid_n_workers, default=cpu_count()-1, help='The number of worker threads to use for preprocessing') parser.add_argument('--config', metavar='FILE', default='config.yaml', help='The config containing all hyperparams.') args = parser.parse_args() if __name__ == '__main__': config = read_config(args.config) wav_files = get_files(args.path, '.wav') wav_ids = {w.stem for w in wav_files} paths = Paths(config['data_path'], config['voc_model_id'], config['tts_model_id']) print(f'\n{len(wav_files)} .wav files found in "{args.path}"') assert len(wav_files) > 0, f'Found no wav files in {args.path}, exiting.' text_dict = ljspeech(args.path) text_dict = {item_id: text for item_id, text in text_dict.items() if item_id in wav_ids and len(text) > config['preprocessing']['min_text_len']} wav_files = [w for w in wav_files if w.stem in text_dict] print(f'Using {len(wav_files)} wav files that are indexed in metafile.\n') n_workers = max(1, args.num_workers) dsp = DSP.from_config(config)
y, mu=2**hp.bits) if hp.mu_law else float_2_label(y, bits=hp.bits) elif hp.voc_mode == 'MOL': quant = float_2_label(y, bits=16) return mel.astype(np.float32), quant.astype(np.int64) def process_wav(path: Path): wav_id = path.split("/")[-1] m, x = convert_file(path) np.save(paths.mel / f'{wav_id}.npy', m, allow_pickle=False) np.save(paths.quant / f'{wav_id}.npy', x, allow_pickle=False) return wav_id, m.shape[-1] wav_files = get_files(path, hp.book_names, hp.metadata, extension) paths = Paths(hp.data_path, hp.voc_model_id, hp.tts_model_id) print(wav_files[0]) print(f'\n{len(wav_files)} {extension[1:]} files found in "{path}"\n') if len(wav_files) == 0: print('Please point wav_path in hparams.py to your dataset,') print('or use the --path option.\n') else: if not hp.ignore_tts: text_dict = blizzard(path, hp.book_names, hp.metadata)
def read_action(path): frame_names=files.get_files(path,append=True) frames=[cv2.imread(frame_name_i,cv2.IMREAD_GRAYSCALE) for frame_name_i in frame_names] return frames,frame_names