Python MLSADF Examples, pysptk.synthesis.MLSADF Python Examples

Example #1

0

Show file

def spec_to_waveform(spectrogram, order, fs, frame_period):
    alpha = pysptk.util.mcepalpha(fs)
    hop_length = int(fs * (frame_period * 0.001))
    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=hop_length)
    b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
    waveform = engine.synthesis(x, b)

    return waveform

Example #2

0

Show file

def test_vc_from_path(model, path, data_mean, data_std, diffvc=True):
    model.eval()

    fs, x = wavfile.read(path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    # Apply model
    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)
    y_hat, y_hat_static = model(mc_scaled, R)
    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs

Example #3

0

Show file

File: AudioSpectrogram.py Project: wuweijia1994/SmartSpeaker

    def pysptk_imfcc(self):
        from pysptk.synthesis import MLSADF, Synthesizer

        # Convert mel-cesptrum to MLSADF coefficients
        b = pysptk.mc2b(self.mc, self.alpha)

        synthesizer = Synthesizer(MLSADF(order=self.order, alpha=self.alpha),
                                  self.hop_length)

        x_synthesized = synthesizer.synthesis(self.source_excitation, b)

        librosa.display.waveplot(x_synthesized, sr=self.sr)
        a = 0

Example #4

0

Show file

 def synthesize(self, pitch, mc, unnormalize=False):
     if unnormalize and self.mean != None:
         for tt in range(len(pitch)):
             for ii in range(self.num_params + 1):
                 mc[tt][ii] = mc[tt][ii] * self.stdev[ii] + self.mean[
                     ii]  #(mc[tt][ii]-self.mean[ii]) / self.stdev[ii]
     mc = np.asarray(mc, dtype=np.float64)
     pitch = np.asarray(pitch, dtype=np.float64)
     #print mc.shape
     #print pitch.shape
     b = sptk.mc2b(mc, self.alpha)
     synthesizer = Synthesizer(
         MLSADF(order=self.num_params, alpha=self.alpha),
         self.frame_len * self.sample_rate / 1000)
     source_excitation = sptk.excite(
         pitch, self.frame_len * self.sample_rate / 1000)
     x_synthesized = synthesizer.synthesis(source_excitation, b)
     return x_synthesized

Example #5

0

Show file

File: noise_shaping.py Project: patrickltobing/shallow-wavenet

def synthesis_diff(x,
                   diffmcep,
                   rmcep=None,
                   alpha=MCEP_ALPHA,
                   fs=FS,
                   shiftms=SHIFTMS):
    """filtering with a differential mel-cesptrum
        Parameters
        ----------
        x : array, shape (`samples`)
            array of waveform sequence
        diffmcep : array, shape (`T`, `dim`)
            array of differential mel-cepstrum sequence
        rmcep : array, shape (`T`, `dim`)
            array of reference mel-cepstrum sequence
            Default set to None
        alpha : float, optional
            Parameter of all-path transfer function
            Default set to 0.42
        Return
        ----------
        wav: array, shape (`samples`)
            Synethesized waveform
        """

    x = x.astype(np.float64)
    dim = diffmcep.shape[1] - 1
    shiftl = int(fs / 1000 * shiftms)

    if rmcep is not None:
        # power modification
        diffmcep = mod_power(rmcep + diffmcep, rmcep, alpha=alpha) - rmcep

    b = np.apply_along_axis(ps.mc2b, 1, diffmcep, alpha)
    assert np.isfinite(b).all()

    mlsa_fil = ps.synthesis.Synthesizer(MLSADF(dim, alpha=alpha), shiftl)
    wav = mlsa_fil.synthesis(x, b)

    return wav

Example #6

0

Show file

def test_one_utt(src_path, tgt_path, disable_mlpg=False, diffvc=True):
    # GMM-based parameter generation is provided by the library in `baseline` module
    if disable_mlpg:
        # Force disable MLPG
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    fs, x = wavfile.read(src_path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    pdb.set_trace()

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      frame_period)

    return waveform

Example #7

0

Show file

File: mlsa.py Project: Iselix/kwiiyatta

def apply_mlsa_filter(wav, mcep):
    if mcep.fs > wav.fs:
        mcep = kwiiyatta.resample(mcep, wav.fs)
    elif mcep.fs < wav.fs:
        spec = kwiiyatta.Synthesizer.resample_spectrum_envelope(
            mcep.extract_spectrum(),
            mcep.fs,
            wav.fs
        )
        cutoff = mcep.fs*spec.shape[1]//wav.fs
        spec[:, cutoff:] = np.tile(np.atleast_2d(spec[:, cutoff-1]).T,
                                   spec.shape[-1]-cutoff)
        mcep = kwiiyatta.MelCepstrum(wav.fs, mcep.frame_period)
        mcep.extract(spec)
    # remove power coefficients
    mc = np.hstack((np.zeros((mcep.data.shape[0], 1)), mcep.data[:, 1:]))
    alpha = mcep.alpha()
    engine = Synthesizer(MLSADF(order=mcep.order, alpha=alpha),
                         hopsize=int(mcep.fs * (mcep.frame_period * 0.001)))
    b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
    waveform = engine.synthesis(wav.data, b)
    return kwiiyatta.Wavdata(wav.fs, waveform)

Example #8

0

Show file

File: gmm.py Project: Emanuele93/INMCA

def test_one_utt(path_src, path_tgt, disable_mlpg=False, diffvc=True):
    if disable_mlpg:
        paramgen = MLPG(gmm, windows=[(0, 0, np.array([1.0]))], diff=diffvc)
    else:
        paramgen = MLPG(gmm, windows=windows, diff=diffvc)

    x, fs_ = sf.read(path_src)
    x = x.astype(np.float64)
    f0, time_axis = pyworld.dio(x, fs_, frame_period=frame_period)
    f0 = pyworld.stonemask(x, f0, time_axis, fs_)
    spectrogram = pyworld.cheaptrick(x, f0, time_axis, fs_)
    aperiodicity = pyworld.d4c(x, f0, time_axis, fs_)

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    if use_delta:
        mc = delta_features(mc, windows)
    mc = paramgen.transform(mc)
    if disable_mlpg and mc.shape[-1] != static_dim:
        mc = mc[:, :static_dim]
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))
    if diffvc:
        mc[:, 0] = 0
        engine = Synthesizer(MLSADF(order=order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs_,
                                      frame_period)

    return waveform

Example #9

0

Show file

 def __test(order, alpha, pd):
     __test_synthesis(MLSADF(order, alpha, pd=pd))

Example #10

0

Show file

File: convert_differential.py Project: yuu-trial/AI_Experiment

datalist = []
with open("conf/eval.list", "r") as f:
    for line in f:
        line = line.rstrip()
        datalist.append(line)

for i in range(0, len(datalist)):
    outfile = "result/wav/{}_diff.wav".format(datalist[i])
    with open("data/SF-TF/mgc/{}.mgc".format(datalist[i]), "rb") as f:
        conv_mgc = np.fromfile(f, dtype="<f8", sep="")
        conv_mgc = conv_mgc.reshape(len(conv_mgc) // dim, dim)

    with open("data/SF/mgc/{}.mgc".format(datalist[i]), "rb") as f:
        src_mgc = np.fromfile(f, dtype="<f8", sep="")
        src_mgc = src_mgc.reshape(len(src_mgc) // dim, dim)

    fs, data = wavfile.read("data/SF/wav/{}.wav".format(
        datalist[i]))  # 入力音声そのものをもってくる
    data = data.astype(np.float)

    diff_mgc = conv_mgc - src_mgc  # 差分のフィルタを用意する
    diff_mgc = np.zeros(shape=conv_mgc.shape)

    # 差分のフィルタを入力音声波形に適用する
    b = np.apply_along_axis(sptk.mc2b, 1, diff_mgc, alpha)
    synthesizer = Synthesizer(MLSADF(order=dim - 1, alpha=alpha), 80)
    owav = synthesizer.synthesis(data, b)

    owav = np.clip(owav, -32768, 32767)
    wavfile.write(outfile, fs, owav.astype(np.int16))

Example #11

0

Show file

    pitch = pysptk.swipe(x.astype(np.float64),
                         fs=sr,
                         hopsize=hop_length,
                         min=60,
                         max=240,
                         otype="pitch")
    source_excitation = pysptk.excite(pitch, hop_length)

    # Order of mel-cepstrum

    mc = pysptk.mcep(frames, order, alpha)
    logH = pysptk.mgc2sp(mc, alpha, 0.0, frame_length).real
    print(mc.shape)
    #plt.plot(mc)
    #plotname="x_syn_coefs_" + str(order) + ".png"
    #plt.savefig(plotname)

    # Convert mel-cesptrum to MLSADF coefficients
    b = pysptk.mc2b(mc, alpha)

    synthesizer = Synthesizer(MLSADF(order=order, alpha=alpha), hop_length)

    x_synthesized = synthesizer.synthesis(source_excitation, b)

    filenam = "synthesized_sounds/" + "x_syn" + str(order + 1) + ".wav"
    #wavfile.write("x.wav", sr, x)
    wavfile.write(filenam, sr, x_synthesized)
    time_total = time.time() - start
    writestring = str(order) + "," + str(time_total) + "\n"
    f.write(writestring)

Example #12

0

Show file

File: test_synthesis.py Project: taroc/pysptk

 def __test_invalid_pade(pd):
     MLSADF(20, pd=pd)

Example #13

0

Show file

File: test_synthesis.py Project: taroc/pysptk

 def __test(order, alpha):
     __test_synthesis(MLSADF(order, alpha))

Example #14

0

Show file

def test_vc_from_path(model, x, fs, data_mean, data_std, diffvc=True):
    model.eval()

    hop_length = int(fs * (hp.frame_period * 0.001))
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=hp.frame_period)
    f0 = pyworld.stonemask(x, f0, timeaxis, fs)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)
    alpha = pysptk.util.mcepalpha(fs)
    mc = pysptk.sp2mc(spectrogram, order=hp.order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]
    static_dim = mc.shape[-1]
    mc = P.modspec_smoothing(mc, fs / hop_length, cutoff=50)
    mc = P.delta_features(mc, hp.windows).astype(np.float32)

    T = mc.shape[0]

    inputs = mc[:, :static_dim].copy()

    # Normalization
    mc_scaled = P.scale(mc, data_mean, data_std)

    mc_scaled = Variable(torch.from_numpy(mc_scaled))
    lengths = [len(mc_scaled)]

    # Add batch axis
    mc_scaled = mc_scaled.view(1, -1, mc_scaled.size(-1))

    # For MLPG
    R = unit_variance_mlpg_matrix(hp.windows, T)
    R = torch.from_numpy(R)

    # Apply model
    if model.include_parameter_generation():
        # Case: models include parameter generation in itself
        # Mulistream features cannot be used in this case
        y_hat, y_hat_static = model(mc_scaled, R, lengths=lengths)
    else:
        # Case: generic models (can be sequence model)
        assert hp.has_dynamic_features is not None
        y_hat = model(mc_scaled, lengths=lengths)
        y_hat_static = multi_stream_mlpg(y_hat, R, hp.stream_sizes,
                                         hp.has_dynamic_features)

    mc_static_pred = y_hat_static.data.cpu().numpy().reshape(-1, static_dim)

    # Denormalize
    mc_static_pred = P.inv_scale(mc_static_pred, data_mean[:static_dim],
                                 data_std[:static_dim])

    outputs = mc_static_pred.copy()

    if diffvc:
        mc_static_pred = mc_static_pred - mc[:, :static_dim]

    mc = np.hstack((c0[:, None], mc_static_pred))
    if diffvc:
        mc[:, 0] = 0  # remove power coefficients
        engine = Synthesizer(MLSADF(order=hp.order, alpha=alpha),
                             hopsize=hop_length)
        b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
        waveform = engine.synthesis(x, b)
    else:
        fftlen = pyworld.get_cheaptrick_fft_size(fs)
        spectrogram = pysptk.mc2sp(mc.astype(np.float64),
                                   alpha=alpha,
                                   fftlen=fftlen)
        waveform = pyworld.synthesize(f0, spectrogram, aperiodicity, fs,
                                      hp.frame_period)

    return waveform, inputs, outputs

Example #15

0

Show file

# Parameter generation
paramgen = MLPG(gmm, windows=windows, diff=True)

# Waveform generation for test set
for idx, path in enumerate(source.test_paths):
    fs, x = wavfile.read(path)
    x = x.astype(np.float64)
    f0, timeaxis = pyworld.dio(x, fs, frame_period=frame_period)
    spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
    # aperiodicity = pyworld.d4c(x, f0, timeaxis, fs)

    mc = pysptk.sp2mc(spectrogram, order=order, alpha=alpha)
    c0, mc = mc[:, 0], mc[:, 1:]

    mc = delta_features(mc, windows)
    since = time.time()
    mc = paramgen.transform(mc)
    print("{}, Elapsed time in conversion: {}s".format(idx,
                                                       time.time() - since))
    assert mc.shape[-1] == static_dim
    mc = np.hstack((c0[:, None], mc))

    mc[:, 0] = 0
    engine = Synthesizer(MLSADF(order=order, alpha=alpha), hopsize=80)
    b = pysptk.mc2b(mc.astype(np.float64), alpha=alpha)
    waveform = engine.synthesis(x, b)
    if not exists('resultsVC'):
        os.makedirs('resultsVC')
    wavfile.write(
        "resultsVC/{}_{}.wav".format(splitext(basename(path))[0], 'mlpg'), fs,
        waveform.astype(np.int16))

Example #16

0

Show file

File: extract_features.py Project: karkirowle/DysarthriaCollaboration

 def build_synth(self):
     self.synthesizer = Synthesizer(
         MLSADF(order=self.order, alpha=self.alpha), self.hop_length)

Example #17

0

Show file

# 音声の分析 (基本周波数、スペクトル包絡、非周期性指標)
_, sp, _ = pyworld.wav2world(x, fs)

# メルケプストラム係数の抽出 from WORLDのスペクトル包絡
mcep = pysptk.sp2mc(sp, order=ORDER, alpha=ALPHA)

# ピッチ抽出
pitch = pysptk.swipe(x,
                     fs=fs,
                     hopsize=HOP_LENGTH,
                     min=MIN_F0,
                     max=MAX_F0,
                     otype="pitch")

# 励振源信号(声帯音源)の生成
source_excitation = pysptk.excite(pitch, HOP_LENGTH)

# メルケプストラム係数からMLSAディジタルフィルタ係数に変換
mlsa_coef = pysptk.mc2b(mcep, ALPHA)

# MLSAフィルタの作成
synthesizer = Synthesizer(MLSADF(order=ORDER, alpha=ALPHA), HOP_LENGTH)

# 励振源信号でMLSAフィルタを駆動して音声を合成
y = synthesizer.synthesis(source_excitation, mlsa_coef)

# 音声の書き込み
y = y.astype(np.int16)
wavfile.write(OUT_WAVE_FILE, fs, y)

Example #18

0

Show file

File: pysptk_anasyn_mlsa_others.py Project: K-PTL/speech_process_exercise

                     hopsize=HOP_LENGTH,
                     min=MIN_F0,
                     max=MAX_F0,
                     otype="pitch")

# 励振源信号(声帯音源)の生成
source_excitation = pysptk.excite(pitch, HOP_LENGTH)

# メルケプストラム分析（＝スペクトル包絡の抽出）
mc = pysptk.mcep(frames, ORDER, ALPHA)

# メルケプストラム係数からMLSAディジタルフィルタ係数に変換
mlsa_coef = pysptk.mc2b(mc, ALPHA)

# MLSAフィルタの作成
synthesizer = Synthesizer(MLSADF(order=ORDER, alpha=ALPHA), HOP_LENGTH)

# #### 以降、合成フィルタのパラメタなどを変えて色々な音声を合成

# ### ピッチシフト (音を高くする) ###
OUT_WAVE_FILE = "pitchshift_high.wav"
PITCH_SHIFT = 0.5  # 音を高くする場合は 1より小さい倍率
excitation_pitchhigh = pysptk.excite(pitch * PITCH_SHIFT, HOP_LENGTH)
y = synthesizer.synthesis(excitation_pitchhigh, mlsa_coef)  # 音声合成
y = y.astype(np.int16)
wavfile.write(OUT_WAVE_FILE, fs, y)

# ### ピッチシフト (音を低くする) ###
OUT_WAVE_FILE = "pitchshift_low.wav"
PITCH_SHIFT = 1.5  # 音を低くする場合は 1より大きい倍率
excitation_pitchlow = pysptk.excite(pitch * PITCH_SHIFT, HOP_LENGTH)