Ejemplo n.º 1
0
    def __init__(self):
        speakers = list()
        for file in os.listdir(os.path.join(hparams.VCTK_dataset_path,
                                            "wav48")):
            speakers.append(str(file[1:4]))
        self.speaker_list = speakers

        # Shuffle Speakers
        if not os.path.exists(hparams.shuffle_speaker_file):
            shuffle_list = [i for i in range(len(self.speaker_list))]
            random.shuffle(shuffle_list)
            shuffle_list = shuffle_list[0:hparams.shuffle_speaker_num]
            self.speaker_list = self.shuffle(self.speaker_list, shuffle_list)

            with open(hparams.shuffle_speaker_file, "w") as f:
                for speaker in shuffle_list:
                    f.write(str(speaker) + "\n")
        else:
            shuffle_list = list()
            with open(hparams.shuffle_speaker_file, "r") as f:
                for ele in f.readlines():
                    shuffle_list.append(int(ele))

            self.speaker_list = self.shuffle(self.speaker_list, shuffle_list)

        td = vctk.TranscriptionDataSource(hparams.VCTK_dataset_path,
                                          speakers=self.speaker_list)
        transcriptions = td.collect_files()
        wav_paths = vctk.WavFileDataSource(
            hparams.VCTK_dataset_path,
            speakers=self.speaker_list).collect_files()

        self.text = transcriptions
        self.wav_paths = wav_paths
Ejemplo n.º 2
0
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    speaker_ids_unique = np.unique(speaker_ids)
    speaker_to_speaker_id = {}
    for i, j in zip(speakers, speaker_ids_unique):
        speaker_to_speaker_id[i] = j
    wav_paths = vctk.WavFileDataSource(in_dir,
                                       speakers=speakers).collect_files()

    _ignore_speaker = hparams.not_for_train_speaker.split(", ")
    ignore_speaker = [speaker_to_speaker_id[i] for i in _ignore_speaker]
    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        if speaker_id in ignore_speaker:
            continue
        futures.append(
            executor.submit(
                partial(_process_utterance, out_dir, index + 1, speaker_id,
                        wav_path, text)))
    return [future.result() for future in tqdm(futures)]
Ejemplo n.º 3
0
Archivo: test.py Proyecto: zhf459/GE2E
 def __init__(self, data_root='/home/zeng/work/data/VCTK-Corpus/'):
     super(VCTKDatasets, self).__init__()
     data_sources = vctk.WavFileDataSource(data_root)
     self.paths = data_sources.collect_files()
     data_sources.speakers.sort()
     self.speakers = data_sources.speakers[:10]
     self.train_speaker = self.speakers[:int(len(self.speakers) * 0.9)]
     self.test_speaker = self.speakers[int(len(self.speakers) * 0.9):]
     self.datasets = {_id: SpeakerDatasets(_id, list(filter(lambda x: 'p' + _id in x, self.paths))) for _id in
                      self.speakers}
Ejemplo n.º 4
0
def preprocess():
    speakers = vctk.available_speakers
    wav_paths = vctk.WavFileDataSource(
        hp.origin_data, speakers=speakers).collect_files()

    if not os.path.exists("dataset"):
        os.mkdir("dataset")

    cnt_speaker = -1
    cnt_num = -1
    dict_speaker = list()
    num_dict = list()

    mel_spec_list = list()

    for wav_file in wav_paths:
        base_name = os.path.basename(wav_file)
        speaker_id = int(base_name[1:4])
        cnt_id = int(base_name[5:8])

        if speaker_id not in dict_speaker:
            dict_speaker.append(speaker_id)
            cnt_speaker = cnt_speaker + 1
            num_dict.clear()
            cnt_num = -1

        if cnt_id not in num_dict:
            num_dict.append(cnt_id)
            cnt_num = cnt_num + 1

        spec_name = str(cnt_speaker) + "_" + str(cnt_num) + ".npy"
        mel_spec_list.append(spec_name)

    # executor = ProcessPoolExecutor(max_workers=cpu_count())
    # futures = list()

    wav_temp_list = list()
    save_temp_list = list()
    total_len = len(wav_paths)

    for ind, wav_file in enumerate(wav_paths):
        wav_temp_list.append(wav_file)
        save_temp_list.append(mel_spec_list[ind])

        if (((ind + 1) % 1000) == 0) or (ind == total_len - 1):
            save_by_list(wav_temp_list.copy(), save_temp_list.copy())
            # futures.append(executor.submit(
            #     partial(save_by_list, wav_temp_list.copy(), save_temp_list.copy())))

            wav_temp_list.clear()
            save_temp_list.clear()
            print("Done", ind+1)

    print("Done")
Ejemplo n.º 5
0
    def __init__(self, dataset_path=hparams.dataset_path):
        speakers = list()
        for file in os.listdir(os.path.join(hparams.VCTK_dataset_path,
                                            "wav48")):
            speakers.append(str(file[1:4]))
        self.speaker_list = speakers
        # print(speakers)
        td = vctk.TranscriptionDataSource(hparams.VCTK_dataset_path,
                                          speakers=speakers)
        transcriptions = td.collect_files()
        wav_paths = vctk.WavFileDataSource(hparams.VCTK_dataset_path,
                                           speakers=speakers).collect_files()

        self.dataset_path = dataset_path
        self.text_path = os.path.join(self.dataset_path, "train.txt")
        self.text = transcriptions
        self.wav_paths = wav_paths
Ejemplo n.º 6
0
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []

    speakers = vctk.available_speakers

    td = vctk.TranscriptionDataSource(in_dir, speakers=speakers)
    transcriptions = td.collect_files()
    speaker_ids = td.labels
    wav_paths = vctk.WavFileDataSource(
        in_dir, speakers=speakers).collect_files()

    for index, (speaker_id, text, wav_path) in enumerate(
            zip(speaker_ids, transcriptions, wav_paths)):
        futures.append(executor.submit(
            partial(_process_utterance, out_dir, index + 1, speaker_id, wav_path, text)))
    return [future.result() for future in tqdm(futures)]
Ejemplo n.º 7
0
Archivo: train.py Proyecto: zhf459/GE2E
 def __init__(self, vctk_path='/home/zeng/work/data/VCTK-Corpus/',
              data_root='data',
              mode='TD-SV'):
     self.data_root = data_root
     self.speakers = vctk.WavFileDataSource(data_root=vctk_path)
     speakers = self.speakers.labelmap.keys()
     self.speakerDataSource = {'p' + speaker_id: PyTorchDataset_TISV(
         _NPYDataSource(data_root=data_root,
                        col='',
                        speaker_id='p' + speaker_id,
                        text_index=None if mode == 'TD-SV' else '001',
                        train=True,
                        test_size=0.1
                        )) for speaker_id in speakers}
     self.speakerDataSource_index = {}
     self.speakerDataLoader = {
         _id: data_utils.DataLoader(dataset=self.speakerDataSource[_id], batch_size=hparams.M,
                                    shuffle=True)
         for _id in self.speakerDataSource.keys()
     }
     for index, key in enumerate(self.speakerDataSource.keys()):
         self.speakerDataSource_index[index] = key
Ejemplo n.º 8
0
from nnmnkwii.datasets import vctk

vctk_dataset = vctk.WavFileDataSource('/home/zeng/work/data/VCTK-Corpus/')
print(vctk_dataset.collect_files())
    args = parser.parse_args()

    log_level = args.log.upper()
    logging.getLogger().setLevel(log_level)
    disfluencies = set(['uh', 'um'])

    data_root = args.data_root

    # Do for all speakers
    speakers = vctk.available_speakers

    # Collect all transcripts/wav files
    td = vctk.TranscriptionDataSource(data_root, speakers=speakers)
    transcriptions = td.collect_files()
    wav_paths = vctk.WavFileDataSource(data_root,
                                       speakers=speakers).collect_files()

    # Save dir
    save_dir = join(data_root, "lab")
    if not exists(save_dir):
        os.makedirs(save_dir)

    resources = gentle.Resources()

    for idx in tqdm(range(len(wav_paths))):
        transcript = transcriptions[idx]
        audiofile = wav_paths[idx]
        lab_path = audiofile.replace("wav48/", "lab/").replace(".wav", ".lab")
        print(transcript)
        print(audiofile)
        print(lab_path)
Ejemplo n.º 10
0
Archivo: test.py Proyecto: zhf459/GE2E
def test3():
    class VCTKDatasets():

        def __init__(self, data_root='/home/zeng/work/data/VCTK-Corpus/'):
            super(VCTKDatasets, self).__init__()
            data_sources = vctk.WavFileDataSource(data_root)
            self.paths = data_sources.collect_files()
            data_sources.speakers.sort()
            self.speakers = data_sources.speakers[:10]
            self.train_speaker = self.speakers[:int(len(self.speakers) * 0.9)]
            self.test_speaker = self.speakers[int(len(self.speakers) * 0.9):]
            self.datasets = {_id: SpeakerDatasets(_id, list(filter(lambda x: 'p' + _id in x, self.paths))) for _id in
                             self.speakers}

        def sample_speakers(self, N, train=True):
            speakers = self.train_speaker if train else self.test_speaker
            speaker_selected = random.sample(speakers, N)
            return [self.datasets[_id] for _id in speaker_selected]

    vctk_datasets = VCTKDatasets()
    dsets = vctk_datasets.sample_speakers(3)[0]
    print(dsets)


data_sources = vctk.WavFileDataSource(data_root='/home/zeng/work/data/VCTK-Corpus/')
speakers = data_sources.speakers
files = data_sources.collect_files()
print(files)
speaker_path = list(filter(lambda x: 'p295' in x, files))
sd = SpeakerDatasets('p295',speaker_path)
Ejemplo n.º 11
0
import os
from nnmnkwii.datasets import vctk
import hparams as hp

# speakers = vctk.available_speakers
# print(speakers)
# print(len(speakers))

speakers = list()
for file in os.listdir(os.path.join(hp.vctk_processed, "wav48")):
    speakers.append(str(file[1:4]))
# print(speakers)
# print(len(speakers))

td = vctk.TranscriptionDataSource(hp.vctk_processed, speakers=speakers)
transcriptions = td.collect_files()
wav_paths = vctk.WavFileDataSource(hp.vctk_processed,
                                   speakers=speakers).collect_files()

print(transcriptions[32306])
print(wav_paths[32306])
Ejemplo n.º 12
0
def prepare_txt_dict():
    speakers = vctk.available_speakers
    td = vctk.TranscriptionDataSource(hp.vctk_path, speakers=speakers)
    transcriptions = td.collect_files()
    wav_paths = vctk.WavFileDataSource(hp.vctk_path,
                                       speakers=speakers).collect_files()

    executor = ProcessPoolExecutor(max_workers=cpu_count())
    futures = list()

    save_name_list = list()

    if not os.path.exists("processed"):
        os.mkdir("processed")

    for ind in range(len(wav_paths)):
        savename = os.path.basename(
            wav_paths[ind])[0:len(os.path.basename(wav_paths[ind])) -
                            4] + ".txt"
        savename = os.path.join("processed", savename)
        save_name_list.append(savename)
        # print(savename)
    print("Get Name Done.")

    lists_P = list()

    with g2p.Session():
        for i, text in enumerate(transcriptions):

            list_not_alpha = list()
            for ind, ele in enumerate(text):
                if (not ele.isalpha()) and (ele != ' '):
                    list_not_alpha.append(ind)

            # print(list_not_alpha)

            cnt = 0
            for ind in list_not_alpha:
                text = delete_alpha_str(text, ind - cnt)
                cnt = cnt + 1

            # print(text + "######")

            # os.path.basename(wav_paths[ind])[0:len(os.path.basename(wav_paths[ind]))-4]
            # print(os.path.basename(wav_paths[ind])[0:len(os.path.basename(wav_paths[ind]))-4])
            list_P = g2p.g2p(text)
            # print("...")
            # prepare_txt(savename, text)
            # futures.append(executor.submit(partial(prepare_txt, save_name_list[ind], list_P)))
            lists_P.append(list_P)

            if i % 100 == 0:
                print(i)

    print("Get P Done.")

    for ind, list_P in enumerate(lists_P):
        futures.append(
            executor.submit(partial(prepare_txt, save_name_list[ind], list_P)))

    print("Prepare Done.")

    words_dict = dict()

    for future in futures:
        # print(future.result())
        words_dict.update(future.result())

    # print(word_P_dict)
    with open("words_dict.txt", "w") as f:
        for key in words_dict:
            temp_str_P = str()
            for P in words_dict[key]:
                temp_str_P = temp_str_P + P + " "
            str_write = key + "    " + temp_str_P
            f.write(str_write + "\n")