def __init__(self): speakers = list() for file in os.listdir(os.path.join(hparams.VCTK_dataset_path, "wav48")): speakers.append(str(file[1:4])) self.speaker_list = speakers # Shuffle Speakers if not os.path.exists(hparams.shuffle_speaker_file): shuffle_list = [i for i in range(len(self.speaker_list))] random.shuffle(shuffle_list) shuffle_list = shuffle_list[0:hparams.shuffle_speaker_num] self.speaker_list = self.shuffle(self.speaker_list, shuffle_list) with open(hparams.shuffle_speaker_file, "w") as f: for speaker in shuffle_list: f.write(str(speaker) + "\n") else: shuffle_list = list() with open(hparams.shuffle_speaker_file, "r") as f: for ele in f.readlines(): shuffle_list.append(int(ele)) self.speaker_list = self.shuffle(self.speaker_list, shuffle_list) td = vctk.TranscriptionDataSource(hparams.VCTK_dataset_path, speakers=self.speaker_list) transcriptions = td.collect_files() wav_paths = vctk.WavFileDataSource( hparams.VCTK_dataset_path, speakers=self.speaker_list).collect_files() self.text = transcriptions self.wav_paths = wav_paths
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x): executor = ProcessPoolExecutor(max_workers=num_workers) futures = [] speakers = vctk.available_speakers td = vctk.TranscriptionDataSource(in_dir, speakers=speakers) transcriptions = td.collect_files() speaker_ids = td.labels speaker_ids_unique = np.unique(speaker_ids) speaker_to_speaker_id = {} for i, j in zip(speakers, speaker_ids_unique): speaker_to_speaker_id[i] = j wav_paths = vctk.WavFileDataSource(in_dir, speakers=speakers).collect_files() _ignore_speaker = hparams.not_for_train_speaker.split(", ") ignore_speaker = [speaker_to_speaker_id[i] for i in _ignore_speaker] for index, (speaker_id, text, wav_path) in enumerate( zip(speaker_ids, transcriptions, wav_paths)): if speaker_id in ignore_speaker: continue futures.append( executor.submit( partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text))) return [future.result() for future in tqdm(futures)]
def __init__(self, data_root='/home/zeng/work/data/VCTK-Corpus/'): super(VCTKDatasets, self).__init__() data_sources = vctk.WavFileDataSource(data_root) self.paths = data_sources.collect_files() data_sources.speakers.sort() self.speakers = data_sources.speakers[:10] self.train_speaker = self.speakers[:int(len(self.speakers) * 0.9)] self.test_speaker = self.speakers[int(len(self.speakers) * 0.9):] self.datasets = {_id: SpeakerDatasets(_id, list(filter(lambda x: 'p' + _id in x, self.paths))) for _id in self.speakers}
def preprocess(): speakers = vctk.available_speakers wav_paths = vctk.WavFileDataSource( hp.origin_data, speakers=speakers).collect_files() if not os.path.exists("dataset"): os.mkdir("dataset") cnt_speaker = -1 cnt_num = -1 dict_speaker = list() num_dict = list() mel_spec_list = list() for wav_file in wav_paths: base_name = os.path.basename(wav_file) speaker_id = int(base_name[1:4]) cnt_id = int(base_name[5:8]) if speaker_id not in dict_speaker: dict_speaker.append(speaker_id) cnt_speaker = cnt_speaker + 1 num_dict.clear() cnt_num = -1 if cnt_id not in num_dict: num_dict.append(cnt_id) cnt_num = cnt_num + 1 spec_name = str(cnt_speaker) + "_" + str(cnt_num) + ".npy" mel_spec_list.append(spec_name) # executor = ProcessPoolExecutor(max_workers=cpu_count()) # futures = list() wav_temp_list = list() save_temp_list = list() total_len = len(wav_paths) for ind, wav_file in enumerate(wav_paths): wav_temp_list.append(wav_file) save_temp_list.append(mel_spec_list[ind]) if (((ind + 1) % 1000) == 0) or (ind == total_len - 1): save_by_list(wav_temp_list.copy(), save_temp_list.copy()) # futures.append(executor.submit( # partial(save_by_list, wav_temp_list.copy(), save_temp_list.copy()))) wav_temp_list.clear() save_temp_list.clear() print("Done", ind+1) print("Done")
def __init__(self, dataset_path=hparams.dataset_path): speakers = list() for file in os.listdir(os.path.join(hparams.VCTK_dataset_path, "wav48")): speakers.append(str(file[1:4])) self.speaker_list = speakers # print(speakers) td = vctk.TranscriptionDataSource(hparams.VCTK_dataset_path, speakers=speakers) transcriptions = td.collect_files() wav_paths = vctk.WavFileDataSource(hparams.VCTK_dataset_path, speakers=speakers).collect_files() self.dataset_path = dataset_path self.text_path = os.path.join(self.dataset_path, "train.txt") self.text = transcriptions self.wav_paths = wav_paths
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x): executor = ProcessPoolExecutor(max_workers=num_workers) futures = [] speakers = vctk.available_speakers td = vctk.TranscriptionDataSource(in_dir, speakers=speakers) transcriptions = td.collect_files() speaker_ids = td.labels wav_paths = vctk.WavFileDataSource( in_dir, speakers=speakers).collect_files() for index, (speaker_id, text, wav_path) in enumerate( zip(speaker_ids, transcriptions, wav_paths)): futures.append(executor.submit( partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text))) return [future.result() for future in tqdm(futures)]
def __init__(self, vctk_path='/home/zeng/work/data/VCTK-Corpus/', data_root='data', mode='TD-SV'): self.data_root = data_root self.speakers = vctk.WavFileDataSource(data_root=vctk_path) speakers = self.speakers.labelmap.keys() self.speakerDataSource = {'p' + speaker_id: PyTorchDataset_TISV( _NPYDataSource(data_root=data_root, col='', speaker_id='p' + speaker_id, text_index=None if mode == 'TD-SV' else '001', train=True, test_size=0.1 )) for speaker_id in speakers} self.speakerDataSource_index = {} self.speakerDataLoader = { _id: data_utils.DataLoader(dataset=self.speakerDataSource[_id], batch_size=hparams.M, shuffle=True) for _id in self.speakerDataSource.keys() } for index, key in enumerate(self.speakerDataSource.keys()): self.speakerDataSource_index[index] = key
from nnmnkwii.datasets import vctk vctk_dataset = vctk.WavFileDataSource('/home/zeng/work/data/VCTK-Corpus/') print(vctk_dataset.collect_files())
args = parser.parse_args() log_level = args.log.upper() logging.getLogger().setLevel(log_level) disfluencies = set(['uh', 'um']) data_root = args.data_root # Do for all speakers speakers = vctk.available_speakers # Collect all transcripts/wav files td = vctk.TranscriptionDataSource(data_root, speakers=speakers) transcriptions = td.collect_files() wav_paths = vctk.WavFileDataSource(data_root, speakers=speakers).collect_files() # Save dir save_dir = join(data_root, "lab") if not exists(save_dir): os.makedirs(save_dir) resources = gentle.Resources() for idx in tqdm(range(len(wav_paths))): transcript = transcriptions[idx] audiofile = wav_paths[idx] lab_path = audiofile.replace("wav48/", "lab/").replace(".wav", ".lab") print(transcript) print(audiofile) print(lab_path)
def test3(): class VCTKDatasets(): def __init__(self, data_root='/home/zeng/work/data/VCTK-Corpus/'): super(VCTKDatasets, self).__init__() data_sources = vctk.WavFileDataSource(data_root) self.paths = data_sources.collect_files() data_sources.speakers.sort() self.speakers = data_sources.speakers[:10] self.train_speaker = self.speakers[:int(len(self.speakers) * 0.9)] self.test_speaker = self.speakers[int(len(self.speakers) * 0.9):] self.datasets = {_id: SpeakerDatasets(_id, list(filter(lambda x: 'p' + _id in x, self.paths))) for _id in self.speakers} def sample_speakers(self, N, train=True): speakers = self.train_speaker if train else self.test_speaker speaker_selected = random.sample(speakers, N) return [self.datasets[_id] for _id in speaker_selected] vctk_datasets = VCTKDatasets() dsets = vctk_datasets.sample_speakers(3)[0] print(dsets) data_sources = vctk.WavFileDataSource(data_root='/home/zeng/work/data/VCTK-Corpus/') speakers = data_sources.speakers files = data_sources.collect_files() print(files) speaker_path = list(filter(lambda x: 'p295' in x, files)) sd = SpeakerDatasets('p295',speaker_path)
import os from nnmnkwii.datasets import vctk import hparams as hp # speakers = vctk.available_speakers # print(speakers) # print(len(speakers)) speakers = list() for file in os.listdir(os.path.join(hp.vctk_processed, "wav48")): speakers.append(str(file[1:4])) # print(speakers) # print(len(speakers)) td = vctk.TranscriptionDataSource(hp.vctk_processed, speakers=speakers) transcriptions = td.collect_files() wav_paths = vctk.WavFileDataSource(hp.vctk_processed, speakers=speakers).collect_files() print(transcriptions[32306]) print(wav_paths[32306])
def prepare_txt_dict(): speakers = vctk.available_speakers td = vctk.TranscriptionDataSource(hp.vctk_path, speakers=speakers) transcriptions = td.collect_files() wav_paths = vctk.WavFileDataSource(hp.vctk_path, speakers=speakers).collect_files() executor = ProcessPoolExecutor(max_workers=cpu_count()) futures = list() save_name_list = list() if not os.path.exists("processed"): os.mkdir("processed") for ind in range(len(wav_paths)): savename = os.path.basename( wav_paths[ind])[0:len(os.path.basename(wav_paths[ind])) - 4] + ".txt" savename = os.path.join("processed", savename) save_name_list.append(savename) # print(savename) print("Get Name Done.") lists_P = list() with g2p.Session(): for i, text in enumerate(transcriptions): list_not_alpha = list() for ind, ele in enumerate(text): if (not ele.isalpha()) and (ele != ' '): list_not_alpha.append(ind) # print(list_not_alpha) cnt = 0 for ind in list_not_alpha: text = delete_alpha_str(text, ind - cnt) cnt = cnt + 1 # print(text + "######") # os.path.basename(wav_paths[ind])[0:len(os.path.basename(wav_paths[ind]))-4] # print(os.path.basename(wav_paths[ind])[0:len(os.path.basename(wav_paths[ind]))-4]) list_P = g2p.g2p(text) # print("...") # prepare_txt(savename, text) # futures.append(executor.submit(partial(prepare_txt, save_name_list[ind], list_P))) lists_P.append(list_P) if i % 100 == 0: print(i) print("Get P Done.") for ind, list_P in enumerate(lists_P): futures.append( executor.submit(partial(prepare_txt, save_name_list[ind], list_P))) print("Prepare Done.") words_dict = dict() for future in futures: # print(future.result()) words_dict.update(future.result()) # print(word_P_dict) with open("words_dict.txt", "w") as f: for key in words_dict: temp_str_P = str() for P in words_dict[key]: temp_str_P = temp_str_P + P + " " str_write = key + " " + temp_str_P f.write(str_write + "\n")