コード例 #1
0
def compute_spect(wav):
    # compute spectrogram
    D = pySTFT(wav).T
    D_mel = np.dot(D, mel_basis)
    D_db = 20 * np.log10(np.maximum(min_level, D_mel)) - 16
    S = (D_db + 100) / 100

    S = S[np.newaxis, :, :]
    if S.shape[1] <= 192:
        S, _ = pad_seq_to_2(S, 192)
    uttr = torch.from_numpy(S.astype(np.float32)).to(device)

    return uttr
コード例 #2
0
def _processing_data(hparams, full_path, spk_label, spk_emb, gender, npz_name,
                     pbar, i):
    if gender == 'M':
        lo, hi = 50, 250
    elif gender == 'F':
        lo, hi = 100, 600
    else:
        raise ValueError

    prng = RandomState(int(random.random()))
    x, fs = librosa.load(full_path, sr=hparams.sample_rate)
    assert fs == hparams.sample_rate
    if x.shape[0] % hparams.hop_size == 0:
        x = np.concatenate((x, np.array([1e-06])), axis=0)
    y = signal.filtfilt(b, a, x)
    wav = y * 0.96 + (prng.rand(y.shape[0]) - 0.5) * 1e-06

    # compute spectrogram
    D = pySTFT(wav).T
    D_mel = np.dot(D, mel_basis)
    D_db = 20 * np.log10(np.maximum(min_level, D_mel)) - hparams.ref_level_db
    S = (D_db + 100) / 100

    # extract f0
    f0_rapt = sptk.rapt(wav.astype(np.float32) * 32768,
                        fs,
                        hparams.hop_size,
                        min=lo,
                        max=hi,
                        otype=2)
    index_nonzero = (f0_rapt != -1e10)
    mean_f0, std_f0 = np.mean(f0_rapt[index_nonzero]), np.std(
        f0_rapt[index_nonzero])
    f0_norm = speaker_normalization(f0_rapt, index_nonzero, mean_f0, std_f0)

    assert len(S) == len(f0_rapt)

    data = {
        'mel': S.astype(np.float32),
        'f0': f0_norm.astype(np.float32),
        'spk_label': spk_label
    }
    if spk_emb is not None:
        data['spk_emb'] = spk_emb

    np.savez(npz_name, **data)
    pbar.update(i)
コード例 #3
0
            try:
                x, fs = sf.read(wav_path)
            except Exception as e:
                # print("Error on {}".format(basename))
                print(e) if 'System error' not in str(
                    e
                ) else None  # preprocessed dir can have no wav file due to the lenght constraint.
                continue
            # assert fs == 16000
            if x.shape[0] % 256 == 0:
                x = np.concatenate((x, np.array([1e-06])), axis=0)
            y = signal.filtfilt(b, a, x)
            wav = y * 0.96 + (prng.rand(y.shape[0]) - 0.5) * 1e-06

            # compute spectrogram
            D = pySTFT(wav).T
            D_mel = np.dot(D, mel_basis)
            D_db = 20 * np.log10(np.maximum(min_level, D_mel)) - 16
            S = (D_db + 100) / 100

            # extract f0
            f0_rapt = sptk.rapt(wav.astype(np.float32) * 32768,
                                fs,
                                256,
                                min=lo,
                                max=hi,
                                otype=2)
            index_nonzero = (f0_rapt != -1e10)
            mean_f0, std_f0 = np.mean(f0_rapt[index_nonzero]), np.std(
                f0_rapt[index_nonzero])
            f0_norm = speaker_normalization(f0_rapt, index_nonzero, mean_f0,