Beispiel #1
0
def get_mock_dataset(root_dir):
    """
    root_dir: path to the mocked dataset
    """
    mocked_data = []
    base_dir = os.path.join(root_dir, "LJSpeech-1.1")
    archive_dir = os.path.join(base_dir, "wavs")
    os.makedirs(archive_dir, exist_ok=True)
    metadata_path = os.path.join(base_dir, "metadata.csv")
    sample_rate = 22050

    with open(metadata_path, mode="w", newline='') as metadata_file:
        metadata_writer = csv.writer(metadata_file,
                                     delimiter="|",
                                     quoting=csv.QUOTE_NONE)
        for i, (transcript, normalized_transcript) in enumerate(
                zip(_TRANSCRIPTS, _NORMALIZED_TRANSCRIPT)):
            fileid = f'LJ001-{i:04d}'
            metadata_writer.writerow(
                [fileid, transcript, normalized_transcript])
            filename = fileid + ".wav"
            path = os.path.join(archive_dir, filename)
            data = get_whitenoise(sample_rate=sample_rate,
                                  duration=1,
                                  n_channels=1,
                                  dtype="int16",
                                  seed=i)
            save_wav(path, data, sample_rate)
            mocked_data.append(normalize_wav(data))
    return mocked_data, _TRANSCRIPTS, _NORMALIZED_TRANSCRIPT
Beispiel #2
0
def get_mock_dataset(root_dir):
    """
    root_dir: directory to the mocked dataset
    """
    mocked_data = []
    base_dir = os.path.join(root_dir, 'LibriTTS', 'train-clean-100')
    for i, utterance_id in enumerate(_UTTERANCE_IDS):
        filename = f'{"_".join(str(u) for u in utterance_id)}.wav'
        file_dir = os.path.join(base_dir, str(utterance_id[0]), str(utterance_id[1]))
        os.makedirs(file_dir, exist_ok=True)
        path = os.path.join(file_dir, filename)

        data = get_whitenoise(sample_rate=24000, duration=2, n_channels=1, dtype='int16', seed=i)
        save_wav(path, data, 24000)
        mocked_data.append(normalize_wav(data))

        original_text_filename = f'{"_".join(str(u) for u in utterance_id)}.original.txt'
        path_original = os.path.join(file_dir, original_text_filename)
        with open(path_original, 'w') as file_:
            file_.write(_ORIGINAL_TEXT)

        normalized_text_filename = f'{"_".join(str(u) for u in utterance_id)}.normalized.txt'
        path_normalized = os.path.join(file_dir, normalized_text_filename)
        with open(path_normalized, 'w') as file_:
            file_.write(_NORMALIZED_TEXT)
    return mocked_data, _UTTERANCE_IDS, _ORIGINAL_TEXT, _NORMALIZED_TEXT
Beispiel #3
0
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        base_dir = os.path.join(cls.root_dir, "LJSpeech-1.1")
        archive_dir = os.path.join(base_dir, "wavs")
        os.makedirs(archive_dir, exist_ok=True)
        metadata_path = os.path.join(base_dir, "metadata.csv")
        sample_rate = 22050

        with open(metadata_path, mode="w", newline='') as metadata_file:
            metadata_writer = csv.writer(metadata_file,
                                         delimiter="|",
                                         quoting=csv.QUOTE_NONE)
            for i, (transcript, normalized_transcript) in enumerate(
                    zip(cls.transcripts, cls.normalized_transcripts)):
                fileid = f'LJ001-{i:04d}'
                metadata_writer.writerow(
                    [fileid, transcript, normalized_transcript])
                filename = fileid + ".wav"
                path = os.path.join(archive_dir, filename)
                data = get_whitenoise(sample_rate=sample_rate,
                                      duration=1,
                                      n_channels=1,
                                      dtype="int16",
                                      seed=i)
                save_wav(path, data, sample_rate)
                cls.data.append(normalize_wav(data))
Beispiel #4
0
def get_mock_dataset(root_dir):
    """
    root_dir: directory to the mocked dataset
    """
    mocked_samples = []
    mocked_training = []
    mocked_validation = []
    mocked_testing = []
    sample_rate = 22050

    seed = 0
    for genre in gtzan.gtzan_genres:
        base_dir = os.path.join(root_dir, 'genres', genre)
        os.makedirs(base_dir, exist_ok=True)
        for i in range(100):
            filename = f'{genre}.{i:05d}'
            path = os.path.join(base_dir, f'{filename}.wav')
            data = get_whitenoise(sample_rate=sample_rate,
                                  duration=0.01,
                                  n_channels=1,
                                  dtype='int16',
                                  seed=seed)
            save_wav(path, data, sample_rate)
            sample = (normalize_wav(data), sample_rate, genre)
            mocked_samples.append(sample)
            if filename in gtzan.filtered_test:
                mocked_testing.append(sample)
            if filename in gtzan.filtered_train:
                mocked_training.append(sample)
            if filename in gtzan.filtered_valid:
                mocked_validation.append(sample)
            seed += 1
    return (mocked_samples, mocked_training, mocked_validation, mocked_testing)
Beispiel #5
0
 def assert_sphere(
     self,
     dtype,
     sample_rate,
     num_channels,
     channels_first=True,
     duration=1,
 ):
     """`soundfile_backend.load` can load SPHERE format correctly."""
     path = self.get_temp_path("reference.sph")
     num_frames = duration * sample_rate
     raw = get_wav_data(
         dtype,
         num_channels,
         num_frames=num_frames,
         normalize=False,
         channels_first=False,
     )
     soundfile.write(path,
                     raw,
                     sample_rate,
                     subtype=dtype2subtype(dtype),
                     format="NIST")
     expected = normalize_wav(raw.t() if channels_first else raw)
     data, sr = soundfile_backend.load(path, channels_first=channels_first)
     assert sr == sample_rate
     self.assertEqual(data, expected, atol=1e-4, rtol=1e-8)
    def setUp(cls):
        cls.root_dir = cls.get_base_temp_dir()
        dataset_dir = os.path.join(cls.root_dir,
                                   speechcommands.FOLDER_IN_ARCHIVE,
                                   speechcommands.URL)
        os.makedirs(dataset_dir, exist_ok=True)
        sample_rate = 16000  # 16kHz sample rate
        seed = 0
        for label in LABELS:
            path = os.path.join(dataset_dir, label)
            os.makedirs(path, exist_ok=True)
            for j in range(2):
                # generate hash ID for speaker
                speaker = "{:08x}".format(j)

                for utterance in range(3):
                    filename = f"{speaker}{speechcommands.HASH_DIVIDER}{utterance}.wav"
                    file_path = os.path.join(path, filename)
                    seed += 1
                    data = get_whitenoise(
                        sample_rate=sample_rate,
                        duration=0.01,
                        n_channels=1,
                        dtype="int16",
                        seed=seed,
                    )
                    save_wav(file_path, data, sample_rate)
                    sample = (
                        normalize_wav(data),
                        sample_rate,
                        label,
                        speaker,
                        utterance,
                    )
                    cls.samples.append(sample)
Beispiel #7
0
def _mock_dataset(root_dir, num_speaker):
    dirnames = ["mix"] + [f"s{i+1}" for i in range(num_speaker)]
    for dirname in dirnames:
        os.makedirs(os.path.join(root_dir, dirname), exist_ok=True)

    seed = 0
    sample_rate = 8000
    expected = []
    for filename in _FILENAMES:
        mix = None
        src = []
        for dirname in dirnames:
            waveform = get_whitenoise(
                sample_rate=8000, duration=1, n_channels=1, dtype="int16", seed=seed
            )
            seed += 1

            path = os.path.join(root_dir, dirname, filename)
            save_wav(path, waveform, sample_rate)
            waveform = normalize_wav(waveform)

            if dirname == "mix":
                mix = waveform
            else:
                src.append(waveform)
        expected.append((sample_rate, mix, src))
    return expected
Beispiel #8
0
 def setUpClass(cls):
     cls.root_dir = cls.get_base_temp_dir()
     sample_rate = 22050
     seed = 0
     for genre in gtzan.gtzan_genres:
         base_dir = os.path.join(cls.root_dir, 'genres', genre)
         os.makedirs(base_dir, exist_ok=True)
         for i in range(100):
             filename = f'{genre}.{i:05d}'
             path = os.path.join(base_dir, f'{filename}.wav')
             data = get_whitenoise(sample_rate=sample_rate,
                                   duration=0.01,
                                   n_channels=1,
                                   dtype='int16',
                                   seed=seed)
             save_wav(path, data, sample_rate)
             sample = (normalize_wav(data), sample_rate, genre)
             cls.samples.append(sample)
             if filename in gtzan.filtered_test:
                 cls.testing.append(sample)
             if filename in gtzan.filtered_train:
                 cls.training.append(sample)
             if filename in gtzan.filtered_valid:
                 cls.validation.append(sample)
             seed += 1
Beispiel #9
0
def get_mock_dataset(root_dir, train_csv_contents,
                     ext_audio) -> Tuple[Tensor, int, Dict[str, str]]:
    """
    prepares mocked dataset
    """
    mocked_data = []
    # Note: extension is changed to wav for the sake of test
    # Note: the first content is missing values for `age`, `gender` and `accent` as in the original data.
    # Tsv file name difference does not mean different subset, testing as a whole dataset here
    tsv_filename = os.path.join(root_dir, "train.tsv")
    audio_base_path = os.path.join(root_dir, "clips")
    os.makedirs(audio_base_path, exist_ok=True)
    with open(tsv_filename, "w", newline='') as tsv:
        writer = csv.writer(tsv, delimiter='\t')
        writer.writerow(_HEADERS)
        for i, content in enumerate(train_csv_contents):
            content[2] = str(content[2].encode("utf-8"))
            writer.writerow(content)
            if not content[1].endswith(ext_audio):
                audio_path = os.path.join(audio_base_path,
                                          content[1] + ext_audio)
            else:
                audio_path = os.path.join(audio_base_path, content[1])

            data = get_whitenoise(sample_rate=_SAMPLE_RATE,
                                  duration=1,
                                  n_channels=1,
                                  seed=i,
                                  dtype='float32')
            save_wav(audio_path, data, _SAMPLE_RATE)
            # Append data entry
            mocked_data.append((normalize_wav(data), _SAMPLE_RATE,
                                dict(zip(_HEADERS, content))))
    return mocked_data
Beispiel #10
0
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        # The path convention commonvoice uses
        base_dir = os.path.join(cls.root_dir, commonvoice.FOLDER_IN_ARCHIVE,
                                commonvoice.VERSION, "en")
        os.makedirs(base_dir, exist_ok=True)

        # Tsv file name difference does not mean different subset, testing as a whole dataset here
        tsv_filename = os.path.join(base_dir, commonvoice.TSV)
        with open(tsv_filename, "w", newline='') as tsv:
            writer = csv.writer(tsv, delimiter='\t')
            writer.writerow(cls._headers)
            for i, content in enumerate(cls._train_csv_contents):
                audio_filename = audio_filename = content[1]
                writer.writerow(content)

                # Generate and store audio
                audio_base_path = os.path.join(base_dir, cls._folder_audio)
                os.makedirs(audio_base_path, exist_ok=True)
                audio_path = os.path.join(audio_base_path, audio_filename)
                data = get_whitenoise(sample_rate=cls.sample_rate,
                                      duration=1,
                                      n_channels=1,
                                      seed=i,
                                      dtype='float32')
                save_wav(audio_path, data, cls.sample_rate)

                # Append data entry
                cls.data.append((normalize_wav(data), cls.sample_rate,
                                 dict(zip(cls._headers, content))))
Beispiel #11
0
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        sample_rate = 16000
        utterance = "This is a test utterance."

        base_dir = os.path.join(cls.root_dir, "ARCTIC", "cmu_us_aew_arctic")
        txt_dir = os.path.join(base_dir, "etc")
        os.makedirs(txt_dir, exist_ok=True)
        txt_file = os.path.join(txt_dir, "txt.done.data")
        audio_dir = os.path.join(base_dir, "wav")
        os.makedirs(audio_dir, exist_ok=True)

        seed = 42
        with open(txt_file, "w") as txt:
            for c in ["a", "b"]:
                for i in range(5):
                    utterance_id = f"arctic_{c}{i:04d}"
                    path = os.path.join(audio_dir, f"{utterance_id}.wav")
                    data = get_whitenoise(
                        sample_rate=sample_rate,
                        duration=3,
                        n_channels=1,
                        dtype="int16",
                        seed=seed,
                    )
                    save_wav(path, data, sample_rate)
                    sample = (
                        normalize_wav(data),
                        sample_rate,
                        utterance,
                        utterance_id.split("_")[1],
                    )
                    cls.samples.append(sample)
                    txt.write(f'( {utterance_id} "{utterance}" )\n')
                    seed += 1
Beispiel #12
0
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        base_dir = os.path.join(cls.root_dir, 'LibriTTS', 'train-clean-100')
        for i, utterance_id in enumerate(cls.utterance_ids):
            filename = f'{"_".join(str(u) for u in utterance_id)}.wav'
            file_dir = os.path.join(base_dir, str(utterance_id[0]),
                                    str(utterance_id[1]))
            os.makedirs(file_dir, exist_ok=True)
            path = os.path.join(file_dir, filename)

            data = get_whitenoise(sample_rate=24000,
                                  duration=2,
                                  n_channels=1,
                                  dtype='int16',
                                  seed=i)
            save_wav(path, data, 24000)
            cls.data.append(normalize_wav(data))

            original_text_filename = f'{"_".join(str(u) for u in utterance_id)}.original.txt'
            path_original = os.path.join(file_dir, original_text_filename)
            with open(path_original, 'w') as file_:
                file_.write(cls.original_text)

            normalized_text_filename = f'{"_".join(str(u) for u in utterance_id)}.normalized.txt'
            path_normalized = os.path.join(file_dir, normalized_text_filename)
            with open(path_normalized, 'w') as file_:
                file_.write(cls.normalized_text)
Beispiel #13
0
def get_mock_dataset(root_dir):
    """
    root_dir: directory to the mocked dataset
    """
    mocked_data = []
    dataset_dir = os.path.join(
        root_dir, librispeech.FOLDER_IN_ARCHIVE, librispeech.URL
    )
    os.makedirs(dataset_dir, exist_ok=True)
    sample_rate = 16000  # 16kHz
    seed = 0

    for speaker_id in range(5):
        speaker_path = os.path.join(dataset_dir, str(speaker_id))
        os.makedirs(speaker_path, exist_ok=True)

        for chapter_id in range(3):
            chapter_path = os.path.join(speaker_path, str(chapter_id))
            os.makedirs(chapter_path, exist_ok=True)
            trans_content = []

            for utterance_id in range(10):
                filename = f'{speaker_id}-{chapter_id}-{utterance_id:04d}.wav'
                path = os.path.join(chapter_path, filename)

                transcript = ' '.join(
                    [_NUMBERS[x] for x in [speaker_id, chapter_id, utterance_id]]
                )
                trans_content.append(
                    f'{speaker_id}-{chapter_id}-{utterance_id:04d} {transcript}'
                )

                data = get_whitenoise(
                    sample_rate=sample_rate,
                    duration=0.01,
                    n_channels=1,
                    dtype='float32',
                    seed=seed
                )
                save_wav(path, data, sample_rate)
                sample = (
                    normalize_wav(data),
                    sample_rate,
                    transcript,
                    speaker_id,
                    chapter_id,
                    utterance_id
                )
                mocked_data.append(sample)

                seed += 1

            trans_filename = f'{speaker_id}-{chapter_id}.trans.txt'
            trans_path = os.path.join(chapter_path, trans_filename)
            with open(trans_path, 'w') as f:
                f.write('\n'.join(trans_content))
    return mocked_data
Beispiel #14
0
def get_mock_dataset(dataset_dir):
    """
    dataset_dir: directory to the mocked dataset
    """
    mocked_samples = []
    mocked_train_samples = []
    mocked_valid_samples = []
    mocked_test_samples = []
    os.makedirs(dataset_dir, exist_ok=True)
    sample_rate = 16000  # 16kHz sample rate
    seed = 0
    valid_file = os.path.join(dataset_dir, "validation_list.txt")
    test_file = os.path.join(dataset_dir, "testing_list.txt")
    with open(valid_file, "w") as valid, open(test_file, "w") as test:
        for label in _LABELS:
            path = os.path.join(dataset_dir, label)
            os.makedirs(path, exist_ok=True)
            for j in range(6):
                # generate hash ID for speaker
                speaker = "{:08x}".format(j)

                for utterance in range(3):
                    filename = f"{speaker}{speechcommands.HASH_DIVIDER}{utterance}.wav"
                    file_path = os.path.join(path, filename)
                    seed += 1
                    data = get_whitenoise(
                        sample_rate=sample_rate,
                        duration=0.01,
                        n_channels=1,
                        dtype="int16",
                        seed=seed,
                    )
                    save_wav(file_path, data, sample_rate)
                    sample = (
                        normalize_wav(data),
                        sample_rate,
                        label,
                        speaker,
                        utterance,
                    )
                    mocked_samples.append(sample)
                    if j < 2:
                        mocked_train_samples.append(sample)
                    elif j < 4:
                        valid.write(f'{label}/{filename}\n')
                        mocked_valid_samples.append(sample)
                    elif j < 6:
                        test.write(f'{label}/{filename}\n')
                        mocked_test_samples.append(sample)
    return mocked_samples, mocked_train_samples, mocked_valid_samples, mocked_test_samples
Beispiel #15
0
 def setUpClass(cls):
     cls.root_dir = cls.get_base_temp_dir()
     base_dir = os.path.join(cls.root_dir, 'waves_yesno')
     os.makedirs(base_dir, exist_ok=True)
     for i, label in enumerate(cls.labels):
         filename = f'{"_".join(str(l) for l in label)}.wav'
         path = os.path.join(base_dir, filename)
         data = get_whitenoise(sample_rate=8000,
                               duration=6,
                               n_channels=1,
                               dtype='int16',
                               seed=i)
         save_wav(path, data, 8000)
         cls.data.append(normalize_wav(data))
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        dataset_dir = os.path.join(cls.root_dir,
                                   speechcommands.FOLDER_IN_ARCHIVE,
                                   speechcommands.URL)
        os.makedirs(dataset_dir, exist_ok=True)
        sample_rate = 16000  # 16kHz sample rate
        seed = 0
        valid_file = os.path.join(dataset_dir, "validation_list.txt")
        test_file = os.path.join(dataset_dir, "testing_list.txt")
        with open(valid_file, "w") as valid, open(test_file, "w") as test:
            for label in LABELS:
                path = os.path.join(dataset_dir, label)
                os.makedirs(path, exist_ok=True)
                for j in range(6):
                    # generate hash ID for speaker
                    speaker = "{:08x}".format(j)

                    for utterance in range(3):
                        filename = f"{speaker}{speechcommands.HASH_DIVIDER}{utterance}.wav"
                        file_path = os.path.join(path, filename)
                        seed += 1
                        data = get_whitenoise(
                            sample_rate=sample_rate,
                            duration=0.01,
                            n_channels=1,
                            dtype="int16",
                            seed=seed,
                        )
                        save_wav(file_path, data, sample_rate)
                        sample = (
                            normalize_wav(data),
                            sample_rate,
                            label,
                            speaker,
                            utterance,
                        )
                        cls.samples.append(sample)
                        if j < 2:
                            cls.train_samples.append(sample)
                        elif j < 4:
                            valid.write(f'{label}/{filename}\n')
                            cls.valid_samples.append(sample)
                        elif j < 6:
                            test.write(f'{label}/{filename}\n')
                            cls.test_samples.append(sample)
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        dataset_dir = os.path.join(cls.root_dir, librispeech.FOLDER_IN_ARCHIVE,
                                   librispeech.URL)
        os.makedirs(dataset_dir, exist_ok=True)
        sample_rate = 16000  # 16kHz
        seed = 0

        for speaker_id in range(5):
            speaker_path = os.path.join(dataset_dir, str(speaker_id))
            os.makedirs(speaker_path, exist_ok=True)

            for chapter_id in range(3):
                chapter_path = os.path.join(speaker_path, str(chapter_id))
                os.makedirs(chapter_path, exist_ok=True)
                trans_content = []

                for utterance_id in range(10):
                    filename = f'{speaker_id}-{chapter_id}-{utterance_id:04d}.wav'
                    path = os.path.join(chapter_path, filename)

                    utterance = ' '.join([
                        NUMBERS[x]
                        for x in [speaker_id, chapter_id, utterance_id]
                    ])
                    trans_content.append(
                        f'{speaker_id}-{chapter_id}-{utterance_id:04d} {utterance}'
                    )

                    data = get_whitenoise(sample_rate=sample_rate,
                                          duration=0.01,
                                          n_channels=1,
                                          dtype='float32',
                                          seed=seed)
                    save_wav(path, data, sample_rate)
                    sample = (normalize_wav(data), sample_rate, utterance,
                              speaker_id, chapter_id, utterance_id)
                    cls.samples.append(sample)

                    seed += 1

                trans_filename = f'{speaker_id}-{chapter_id}.trans.txt'
                trans_path = os.path.join(chapter_path, trans_filename)
                with open(trans_path, 'w') as f:
                    f.write('\n'.join(trans_content))
Beispiel #18
0
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        # Tsv file name difference does not mean different subset, testing as a whole dataset here
        tsv_filename = os.path.join(cls.root_dir, "train.tsv")
        audio_base_path = os.path.join(cls.root_dir, "clips")
        os.makedirs(audio_base_path, exist_ok=True)
        with open(tsv_filename, "w", newline='') as tsv:
            writer = csv.writer(tsv, delimiter='\t')
            writer.writerow(cls._headers)
            for i, content in enumerate(cls._train_csv_contents):
                writer.writerow(content)

                # Generate and store audio
                audio_path = os.path.join(audio_base_path, content[1])
                data = get_whitenoise(sample_rate=cls.sample_rate, duration=1, n_channels=1, seed=i, dtype='float32')
                save_wav(audio_path, data, cls.sample_rate)

                # Append data entry
                cls.data.append((normalize_wav(data), cls.sample_rate, dict(zip(cls._headers, content))))
Beispiel #19
0
def get_mock_dataset(root_dir):
    """
    root_dir: root directory of the mocked data
    """
    mocked_samples = []
    dataset_dir = os.path.join(root_dir, 'VCTK-Corpus-0.92')
    os.makedirs(dataset_dir, exist_ok=True)
    sample_rate = 48000
    seed = 0

    for speaker in range(225, 230):
        speaker_id = 'p' + str(speaker)
        audio_dir = os.path.join(dataset_dir, 'wav48_silence_trimmed',
                                 speaker_id)
        os.makedirs(audio_dir, exist_ok=True)

        file_dir = os.path.join(dataset_dir, 'txt', speaker_id)
        os.makedirs(file_dir, exist_ok=True)

        for utterance_id in range(1, 11):
            filename = f'{speaker_id}_{utterance_id:03d}_mic2'
            audio_file_path = os.path.join(audio_dir, filename + '.wav')

            data = get_whitenoise(sample_rate=sample_rate,
                                  duration=0.01,
                                  n_channels=1,
                                  dtype='float32',
                                  seed=seed)
            save_wav(audio_file_path, data, sample_rate)

            txt_file_path = os.path.join(file_dir, filename[:-5] + '.txt')
            transcript = _TRANSCRIPT[utterance_id - 1]
            with open(txt_file_path, 'w') as f:
                f.write(transcript)

            sample = (normalize_wav(data), sample_rate, transcript, speaker_id,
                      utterance_id)
            mocked_samples.append(sample)
            seed += 1
    return mocked_samples
Beispiel #20
0
def get_mock_dataset(root_dir):
    """
    root_dir: directory to the mocked dataset
    """
    mocked_data = []
    sample_rate = 16000
    transcript = "This is a test transcript."

    base_dir = os.path.join(root_dir, "ARCTIC", "cmu_us_aew_arctic")
    txt_dir = os.path.join(base_dir, "etc")
    os.makedirs(txt_dir, exist_ok=True)
    txt_file = os.path.join(txt_dir, "txt.done.data")
    audio_dir = os.path.join(base_dir, "wav")
    os.makedirs(audio_dir, exist_ok=True)

    seed = 42
    with open(txt_file, "w") as txt:
        for c in ["a", "b"]:
            for i in range(5):
                utterance_id = f"arctic_{c}{i:04d}"
                path = os.path.join(audio_dir, f"{utterance_id}.wav")
                data = get_whitenoise(
                    sample_rate=sample_rate,
                    duration=3,
                    n_channels=1,
                    dtype="int16",
                    seed=seed,
                )
                save_wav(path, data, sample_rate)
                sample = (
                    normalize_wav(data),
                    sample_rate,
                    transcript,
                    utterance_id.split("_")[1],
                )
                mocked_data.append(sample)
                txt.write(f'( {utterance_id} "{transcript}" )\n')
                seed += 1
    return mocked_data
Beispiel #21
0
    def setUpClass(cls):
        cls.root_dir = cls.get_base_temp_dir()
        dataset_dir = os.path.join(cls.root_dir, 'VCTK-Corpus-0.92')
        os.makedirs(dataset_dir, exist_ok=True)
        sample_rate = 48000
        seed = 0

        for speaker in range(225, 230):
            speaker_id = 'p' + str(speaker)
            audio_dir = os.path.join(dataset_dir, 'wav48_silence_trimmed',
                                     speaker_id)
            os.makedirs(audio_dir, exist_ok=True)

            file_dir = os.path.join(dataset_dir, 'txt', speaker_id)
            os.makedirs(file_dir, exist_ok=True)

            for utterance_id in range(1, 11):
                filename = f'{speaker_id}_{utterance_id:03d}_mic2'
                audio_file_path = os.path.join(audio_dir, filename + '.wav')

                data = get_whitenoise(sample_rate=sample_rate,
                                      duration=0.01,
                                      n_channels=1,
                                      dtype='float32',
                                      seed=seed)
                save_wav(audio_file_path, data, sample_rate)

                txt_file_path = os.path.join(file_dir, filename[:-5] + '.txt')
                utterance = UTTERANCE[utterance_id - 1]
                with open(txt_file_path, 'w') as f:
                    f.write(utterance)

                sample = (normalize_wav(data), sample_rate, utterance,
                          speaker_id, utterance_id)
                cls.samples.append(sample)

                seed += 1