def test_lpc2lsp(): for order in [15, 20, 25, 30]: yield __test_transform_base, pysptk.lpc2lsp, order def __test_invalid_otype(dummy_lpc, otype): pysptk.lpc2lsp(dummy_lpc, otype=otype) np.random.seed(98765) dummy_lpc = pysptk.lpc(np.random.rand(512), 21) # invalid otype yield raises(ValueError)(__test_invalid_otype), dummy_lpc, 2 yield raises(ValueError)(__test_invalid_otype), dummy_lpc, 3 lsp1 = pysptk.lpc2lsp(dummy_lpc, otype=2, fs=16000) lsp2 = pysptk.lpc2lsp(dummy_lpc, otype=3, fs=16) assert np.allclose(lsp1, lsp2) # loggain lsp3 = pysptk.lpc2lsp(dummy_lpc, otype=3, fs=16, loggain=True) assert lsp3[0] == np.log(lsp2[0])
def __test_synthesis(filt): # dummy source excitation source = __dummy_source() hopsize = 80 # dummy filter coef. windowed = __dummy_windowed_frames(source, frame_len=512, hopsize=hopsize) lpc = pysptk.lpc(windowed, filt.order) lsp = pysptk.lpc2lsp(lpc) # make sure lsp has loggain lsp[:, 0] = np.log(lsp[:, 0]) # synthesis synthesizer = Synthesizer(filt, hopsize) y = synthesizer.synthesis(source, lsp) assert np.all(np.isfinite(y))
def __test_invalid_otype(dummy_lpc, otype): pysptk.lpc2lsp(dummy_lpc, otype=otype)
hop_length=HOP_LENGTH).astype(np.float64).T frames *= pysptk.blackman(FRAME_LENGTH) # 窓掛け(ブラックマン窓) # ピッチ抽出 pitch = pysptk.swipe(x, fs=fs, hopsize=HOP_LENGTH, min=MIN_F0, max=MAX_F0, otype="pitch") # 励振源信号(声帯音源)の生成 source_excitation = pysptk.excite(pitch, HOP_LENGTH) # 線形予測分析による線形予測符号化(LPC)係数の抽出 lpc = pysptk.lpc(frames, ORDER) lpc[:, 0] = np.log(lpc[:, 0]) # LPC係数を線スペクトル対に変換 lsp = pysptk.lpc2lsp(lpc, otype=0, fs=fs) # 全極フィルタの作成 synthesizer = Synthesizer(LSPDF(order=ORDER), HOP_LENGTH) # 励振源信号でフィルタを駆動して音声を合成 y = synthesizer.synthesis(source_excitation, lsp) # 音声の書き込み y = y.astype(np.int16) wavfile.write(OUT_WAVE_FILE, fs, y)
def __dummy_windowed_frames(source, frame_len=512, hopsize=80): np.random.seed(98765) n_frames = int(len(source) / hopsize) + 1 windowed = np.random.randn(n_frames, frame_len) * pysptk.blackman(frame_len) return 0.5 * 32768.0 * windowed # audio, _ = lbr.load("data/SF1/100008.wav", sr=None, dtype=np.float64) audio, _ = lbr.load(lbr.util.example_audio_file(), sr=None) hopsize = 80 windowed = __dummy_windowed_frames( audio, frame_len=frame_length, hopsize=hop_length) frames = lbr.util.frame(audio, frame_length=frame_length, hop_length=hop_length).astype(np.float64).T frames *= pysptk.hamming(frame_length, normalize=0) print(type(frames), frames.shape) # mcep = np.apply_along_axis(pysptk.mcep, 1, frames, order=order, alpha=alpha).T lpcs = np.apply_along_axis(pysptk.lpc, 1, windowed, order=order) lpcs1 = np.apply_along_axis(pysptk.lpc, 1, frames, order=order) # for frame in frames: # print(frame.shape) # lpc = pysptk.lpc(frame) print(type(lpcs), lpcs.shape) lsp = pysptk.lpc2lsp(lpcs) print(type(lsp), lsp.shape)