from scipy.signal import filtfilt from chapter2_基础.soundBase import * from chapter3_分析实验.lpc import lpc_coeff plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False data, fs = soundBase('C6_1_y.wav').audioread() N = len(data) time = [i / fs for i in range(N)] # 设置时间 p = 12 ar, g = lpc_coeff(data, p) ar[0] = 0 est_x = filtfilt(-ar, [1], data) plt.subplot(2, 1, 1) plt.plot(time, data, 'k') plt.plot(time, est_x, 'c') plt.title('LPC解码') plt.legend(['信号', '解码信号']) plt.subplot(2, 1, 2) plt.plot(est_x - data) plt.title('误差') plt.savefig('images/LPC解码.png') plt.close()
from scipy.io import wavfile import matplotlib.pyplot as plt from chapter3_分析实验.windows import * from chapter3_分析实验.timefeature import * from chapter2_基础.soundBase import * data, fs, nbits = soundBase('C3_2_y.wav').audioread() inc = 100 wlen = 200 win = hanning_window(wlen) N = len(data) time = [i / fs for i in range(N)] EN = STEn(data, win, inc) # 短时能量 Mn = STMn(data, win, inc) # 短时平均幅度 Zcr = STZcr(data, win, inc) # 短时过零率 X = enframe(data, win, inc) X = X.T Ac = STAc(X) Ac = Ac.T Ac = Ac.flatten() Amdf = STAmdf(X) Amdf = Amdf.flatten() fig = plt.figure(figsize=(14, 13)) plt.subplot(3, 1, 1)
from chapter2_基础.soundBase import * from chapter3_分析实验.lpc import * data, fs = soundBase('C3_5_y.wav').audioread() L = 240 p = 12 x = data[8000:8000 + L] ar, G = lpc_coeff(x, p) nfft = 512 W2 = nfft // 2 m = np.array([i for i in range(W2)]) Y = np.fft.fft(x, nfft) Y1 = lpcff(ar, W2) plt.subplot(2, 1, 1) plt.plot(x) plt.subplot(2, 1, 2) plt.plot(m, 20 * np.log(np.abs(Y[m]))) plt.plot(m, 20 * np.log(np.abs(Y1[m]))) plt.savefig('images/lpcff.png') plt.close()
from chapter2_基础.soundBase import * from chapter4_特征提取.pitch_detection import * data, fs = soundBase('C4_2_y.wav').audioread() data -= np.mean(data) data /= np.max(np.abs(data)) wlen = 320 inc = 80 N = len(data) time = [i / fs for i in range(N)] T1 = 0.05 # 4.2.1 voiceseg, vosl, SF, Ef = pitch_vad(data, wlen, inc, T1) fn = len(SF) frameTime = FrameTimeC(fn, wlen, inc, fs) plt.figure(figsize=(14, 8)) plt.subplot(5, 1, 1) plt.plot(time, data) plt.subplot(5, 1, 2) plt.plot(frameTime, Ef) for i in range(vosl): plt.subplot(5, 1, 2) plt.plot(frameTime[voiceseg[i]['start']], Ef[voiceseg[i]['start']], '.k') plt.plot(frameTime[voiceseg[i]['end']], Ef[voiceseg[i]['start']], 'or') plt.legend(['能熵比', 'start', 'end']) # 4.2.3 voiceseg, vsl, SF, Ef, period = pitch_Ceps(data, wlen, inc, T1, fs, miniL=10)
else: xx[j] = x[j] // 2 * 2 + 1 return xx, m_len def extract_message(x, m_len, nBits=1): if nBits != 1: exit('Only nBits=1 support now......') meg = np.zeros((m_len, nBits)) for i in range(nBits): for j in range(m_len): meg[j, i] = x[j] % 2 return meg data, fs, bits = soundBase('C8_1_y.wav').audioread(return_nbits=True) data16 = (data + 1) * np.power(2, bits - 1) nBits = 1 s = loadmat('C8_1_y.DAT') x_embed, m_len = hide_message(data16, s['message'][0], 1) meg_rec = extract_message(x_embed, m_len, 1) plt.figure(figsize=(14, 12)) plt.subplot(3, 1, 1) plt.plot(data16) plt.subplot(3, 1, 2) plt.plot(x_embed) plt.subplot(3, 1, 3) plt.plot(data16 - x_embed)
from chapter2_基础.soundBase import * from chapter5_语音降噪.Wavelet import * plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False def awgn(x, snr): snr = 10**(snr / 10.0) xpower = np.sum(x**2) / len(x) npower = xpower / snr return x + np.random.randn(len(x)) * np.sqrt(npower) data, fs = soundBase('C5_4_y.wav').audioread() data -= np.mean(data) data /= np.max(np.abs(data)) SNR = 5 N = len(data) s = awgn(data, SNR) time = [i / fs for i in range(N)] # 设置时间 wname = 'db7' jN = 6 res_s = Wavelet_Soft(s, jN, wname) res_d = Wavelet_Hard(s, jN, wname) res_hs = Wavelet_hardSoft(s, jN, wname) res_a = Wavelet_average(s, jN, wname) plt.figure(figsize=(14, 10))
from chapter3_分析实验.windows import * from chapter3_分析实验.timefeature import * from chapter2_基础.soundBase import * from chapter3_分析实验.倒谱计算 import * data, fs = soundBase('C3_4_y_1.wav').audioread() nlen = 1000 y = data[:nlen] N = 1024 time = [i / fs for i in range(nlen)] z = cceps(y) zr = rcceps(y) yy = icceps(z) plt.subplot(4, 1, 1) plt.plot(time, y) plt.title('原始信号') plt.subplot(4, 1, 2) plt.plot(time, z) plt.title('复倒谱') plt.subplot(4, 1, 3) plt.plot(time, zr) plt.title('实倒谱') plt.subplot(4, 1, 4) plt.plot(time, yy) plt.title('重构信号') plt.savefig('images/倒谱.png') plt.close()
scores1 = np.zeros(N) scores2 = np.zeros(N) scores3 = np.zeros(N) for i in range(N): scores1[i] = myDCW(CMN(features['p1_{}'.format(i)]), r) scores2[i] = myDCW(CMN(features['p2_{}'.format(i)]), r) scores3[i] = myDCW(CMN(features['p2_{}'.format(i)]), r) return scores1, scores2, scores3 if __name__ == '__main__': # 制作模板集 features = {} for i in range(10): data, fs, bits = soundBase('p1/{}.wav'.format(i)).audioread() speechIn1 = my_vad(data) fm = mfccf(12, speechIn1, fs) features['p1_{}'.format(i)] = fm for i in range(10): data, fs, bits = soundBase('p2/{}.wav'.format(i)).audioread() speechIn1 = my_vad(data) fm = mfccf(12, speechIn1, fs) features['p2_{}'.format(i)] = fm for i in range(10): data, fs, bits = soundBase('p3/{}.wav'.format(i)).audioread() speechIn1 = my_vad(data) fm = mfccf(12, speechIn1, fs) features['p3_{}'.format(i)] = fm soundBase('mysound.wav').audiorecorder(rate=16000, channels=1)
from chapter2_基础.soundBase import * from chapter3_分析实验.dct import * from chapter3_分析实验.mel import * data, fs, _ = soundBase('C3_4_y_4.wav').audioread() wlen = 200 inc = 80 num = 8 data = data / np.max(data) mfcc = Nmfcc(data, fs, num, wlen, inc)
else: xx[j] = x[j] // 2 * 2 + 1 return xx, m_len def extract_message(x, m_len, nBits=1): if nBits != 1: exit('Only nBits=1 support now......') meg = np.zeros((m_len, nBits)) for i in range(nBits): for j in range(m_len): meg[j, i] = x[j] % 2 return meg data, fs, bits = soundBase('C8_1_y.wav').audioread() data16 = (data + 1) * np.power(2, bits - 1) nBits = 1 s = loadmat('C8_1_y.DAT') x_embed, m_len = hide_message(data16, s['message'][0], 1) meg_rec = extract_message(x_embed, m_len, 1) plt.figure(figsize=(14, 12)) plt.subplot(3, 1, 1) plt.plot(data16) plt.subplot(3, 1, 2) plt.plot(x_embed) plt.subplot(3, 1, 3) plt.plot(data16 - x_embed)
from chapter3_分析实验.timefeature import * from chapter7_语音合成.flipframe import * from chapter3_分析实验.C3_1_y_1 import enframe from chapter3_分析实验.lpc import lpc_coeff from chapter4_特征提取.共振峰估计 import * from chapter4_特征提取.pitch_detection import * from chapter7_语音合成.myfilter import * from scipy.signal import lfilter plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False data, fs = soundBase('C7_3_y.wav').audioread() data -= np.mean(data) data /= np.max(np.abs(data)) data = lfilter([1, -0.99], 1, data) N = len(data) time = [i / fs for i in range(N)] # 设置时间 wlen = 240 inc = 80 overlap = wlen - inc n2 = [i for i in range(wlen // 2)] w1 = [i / overlap for i in range(overlap)] w2 = [i / overlap for i in range(overlap - 1, -1, -1)] wnd = np.hamming(wlen) X = enframe(data, wnd, inc) fn = X.shape[0] Etmp = np.sum(np.power(X, 2), axis=1)
from chapter2_基础.soundBase import * from chapter11_说话人识别.VQ import * from chapter10_语音识别.DTW.DCW import mfccf plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False k = 8 N = 4 ## 生成book u = np.zeros(N) for i in range(1, N + 1): s = 'VQ_data/SX' + str(i) + '.WAV' # s = 'VQ_data/mysound.WAV' data, fs, bits = soundBase(s).audioread() data /= np.max(data) mel = mfccf(12, data, fs) v = lbg(mel.T, k) u[i] = v ## 识别过程 M = 4 # 每个人有M个待识别的样本 l = 5 # 这部分需要用新的语音信号对于MATLAB调试查看结果 for iii in range(l): for i in range(M): Dstu = np.zeros(N) s = 'VQ_data/TX{}_{}.wav'.format(iii, i) data, fs, bits = soundBase(s).audioread()