Ejemplo n.º 1
0
from scipy.signal import filtfilt

from chapter2_基础.soundBase import *
from chapter3_分析实验.lpc import lpc_coeff

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

data, fs = soundBase('C6_1_y.wav').audioread()
N = len(data)
time = [i / fs for i in range(N)]  # 设置时间
p = 12
ar, g = lpc_coeff(data, p)
ar[0] = 0
est_x = filtfilt(-ar, [1], data)

plt.subplot(2, 1, 1)
plt.plot(time, data, 'k')
plt.plot(time, est_x, 'c')
plt.title('LPC解码')
plt.legend(['信号', '解码信号'])
plt.subplot(2, 1, 2)
plt.plot(est_x - data)
plt.title('误差')
plt.savefig('images/LPC解码.png')
plt.close()
Ejemplo n.º 2
0
from scipy.io import wavfile
import matplotlib.pyplot as plt
from chapter3_分析实验.windows import *
from chapter3_分析实验.timefeature import *
from chapter2_基础.soundBase import *

data, fs, nbits = soundBase('C3_2_y.wav').audioread()



inc = 100
wlen = 200
win = hanning_window(wlen)
N = len(data)
time = [i / fs for i in range(N)]

EN = STEn(data, win, inc)  # 短时能量
Mn = STMn(data, win, inc)  # 短时平均幅度
Zcr = STZcr(data, win, inc)  # 短时过零率

X = enframe(data, win, inc)
X = X.T
Ac = STAc(X)
Ac = Ac.T
Ac = Ac.flatten()

Amdf = STAmdf(X)
Amdf = Amdf.flatten()

fig = plt.figure(figsize=(14, 13))
plt.subplot(3, 1, 1)
Ejemplo n.º 3
0
from chapter2_基础.soundBase import *
from chapter3_分析实验.lpc import *

data, fs = soundBase('C3_5_y.wav').audioread()
L = 240
p = 12
x = data[8000:8000 + L]
ar, G = lpc_coeff(x, p)
nfft = 512
W2 = nfft // 2
m = np.array([i for i in range(W2)])
Y = np.fft.fft(x, nfft)
Y1 = lpcff(ar, W2)
plt.subplot(2, 1, 1)
plt.plot(x)
plt.subplot(2, 1, 2)
plt.plot(m, 20 * np.log(np.abs(Y[m])))
plt.plot(m, 20 * np.log(np.abs(Y1[m])))
plt.savefig('images/lpcff.png')
plt.close()
Ejemplo n.º 4
0
from chapter2_基础.soundBase import *
from chapter4_特征提取.pitch_detection import *

data, fs = soundBase('C4_2_y.wav').audioread()
data -= np.mean(data)
data /= np.max(np.abs(data))
wlen = 320
inc = 80
N = len(data)
time = [i / fs for i in range(N)]
T1 = 0.05

# 4.2.1
voiceseg, vosl, SF, Ef = pitch_vad(data, wlen, inc, T1)
fn = len(SF)
frameTime = FrameTimeC(fn, wlen, inc, fs)

plt.figure(figsize=(14, 8))

plt.subplot(5, 1, 1)
plt.plot(time, data)
plt.subplot(5, 1, 2)
plt.plot(frameTime, Ef)
for i in range(vosl):
    plt.subplot(5, 1, 2)
    plt.plot(frameTime[voiceseg[i]['start']], Ef[voiceseg[i]['start']], '.k')
    plt.plot(frameTime[voiceseg[i]['end']], Ef[voiceseg[i]['start']], 'or')
    plt.legend(['能熵比', 'start', 'end'])

# 4.2.3
voiceseg, vsl, SF, Ef, period = pitch_Ceps(data, wlen, inc, T1, fs, miniL=10)
            else:
                xx[j] = x[j] // 2 * 2 + 1
    return xx, m_len


def extract_message(x, m_len, nBits=1):
    if nBits != 1:
        exit('Only nBits=1 support now......')
    meg = np.zeros((m_len, nBits))
    for i in range(nBits):
        for j in range(m_len):
            meg[j, i] = x[j] % 2
    return meg


data, fs, bits = soundBase('C8_1_y.wav').audioread(return_nbits=True)
data16 = (data + 1) * np.power(2, bits - 1)
nBits = 1
s = loadmat('C8_1_y.DAT')

x_embed, m_len = hide_message(data16, s['message'][0], 1)
meg_rec = extract_message(x_embed, m_len, 1)

plt.figure(figsize=(14, 12))
plt.subplot(3, 1, 1)
plt.plot(data16)
plt.subplot(3, 1, 2)
plt.plot(x_embed)
plt.subplot(3, 1, 3)
plt.plot(data16 - x_embed)
Ejemplo n.º 6
0
from chapter2_基础.soundBase import *
from chapter5_语音降噪.Wavelet import *

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False


def awgn(x, snr):
    snr = 10**(snr / 10.0)
    xpower = np.sum(x**2) / len(x)
    npower = xpower / snr
    return x + np.random.randn(len(x)) * np.sqrt(npower)


data, fs = soundBase('C5_4_y.wav').audioread()
data -= np.mean(data)
data /= np.max(np.abs(data))
SNR = 5
N = len(data)
s = awgn(data, SNR)
time = [i / fs for i in range(N)]  # 设置时间

wname = 'db7'
jN = 6

res_s = Wavelet_Soft(s, jN, wname)
res_d = Wavelet_Hard(s, jN, wname)
res_hs = Wavelet_hardSoft(s, jN, wname)
res_a = Wavelet_average(s, jN, wname)

plt.figure(figsize=(14, 10))
Ejemplo n.º 7
0
from chapter3_分析实验.windows import *
from chapter3_分析实验.timefeature import *
from chapter2_基础.soundBase import *
from chapter3_分析实验.倒谱计算 import *

data, fs = soundBase('C3_4_y_1.wav').audioread()
nlen = 1000
y = data[:nlen]
N = 1024
time = [i / fs for i in range(nlen)]
z = cceps(y)
zr = rcceps(y)
yy = icceps(z)

plt.subplot(4, 1, 1)
plt.plot(time, y)
plt.title('原始信号')
plt.subplot(4, 1, 2)
plt.plot(time, z)
plt.title('复倒谱')
plt.subplot(4, 1, 3)
plt.plot(time, zr)
plt.title('实倒谱')
plt.subplot(4, 1, 4)
plt.plot(time, yy)
plt.title('重构信号')
plt.savefig('images/倒谱.png')
plt.close()
Ejemplo n.º 8
0
    scores1 = np.zeros(N)
    scores2 = np.zeros(N)
    scores3 = np.zeros(N)

    for i in range(N):
        scores1[i] = myDCW(CMN(features['p1_{}'.format(i)]), r)
        scores2[i] = myDCW(CMN(features['p2_{}'.format(i)]), r)
        scores3[i] = myDCW(CMN(features['p2_{}'.format(i)]), r)
    return scores1, scores2, scores3


if __name__ == '__main__':
    # 制作模板集
    features = {}
    for i in range(10):
        data, fs, bits = soundBase('p1/{}.wav'.format(i)).audioread()
        speechIn1 = my_vad(data)
        fm = mfccf(12, speechIn1, fs)
        features['p1_{}'.format(i)] = fm
    for i in range(10):
        data, fs, bits = soundBase('p2/{}.wav'.format(i)).audioread()
        speechIn1 = my_vad(data)
        fm = mfccf(12, speechIn1, fs)
        features['p2_{}'.format(i)] = fm
    for i in range(10):
        data, fs, bits = soundBase('p3/{}.wav'.format(i)).audioread()
        speechIn1 = my_vad(data)
        fm = mfccf(12, speechIn1, fs)
        features['p3_{}'.format(i)] = fm

    soundBase('mysound.wav').audiorecorder(rate=16000, channels=1)
Ejemplo n.º 9
0
from chapter2_基础.soundBase import *
from chapter3_分析实验.dct import *
from chapter3_分析实验.mel import *

data, fs, _ = soundBase('C3_4_y_4.wav').audioread()

wlen = 200
inc = 80
num = 8
data = data / np.max(data)
mfcc = Nmfcc(data, fs, num, wlen, inc)
Ejemplo n.º 10
0
            else:
                xx[j] = x[j] // 2 * 2 + 1
    return xx, m_len


def extract_message(x, m_len, nBits=1):
    if nBits != 1:
        exit('Only nBits=1 support now......')
    meg = np.zeros((m_len, nBits))
    for i in range(nBits):
        for j in range(m_len):
            meg[j, i] = x[j] % 2
    return meg


data, fs, bits = soundBase('C8_1_y.wav').audioread()
data16 = (data + 1) * np.power(2, bits - 1)
nBits = 1
s = loadmat('C8_1_y.DAT')

x_embed, m_len = hide_message(data16, s['message'][0], 1)
meg_rec = extract_message(x_embed, m_len, 1)

plt.figure(figsize=(14, 12))
plt.subplot(3, 1, 1)
plt.plot(data16)
plt.subplot(3, 1, 2)
plt.plot(x_embed)
plt.subplot(3, 1, 3)
plt.plot(data16 - x_embed)
Ejemplo n.º 11
0
from chapter3_分析实验.timefeature import *
from chapter7_语音合成.flipframe import *
from chapter3_分析实验.C3_1_y_1 import enframe
from chapter3_分析实验.lpc import lpc_coeff
from chapter4_特征提取.共振峰估计 import *

from chapter4_特征提取.pitch_detection import *

from chapter7_语音合成.myfilter import *

from scipy.signal import lfilter

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

data, fs = soundBase('C7_3_y.wav').audioread()
data -= np.mean(data)
data /= np.max(np.abs(data))
data = lfilter([1, -0.99], 1, data)
N = len(data)
time = [i / fs for i in range(N)]  # 设置时间
wlen = 240
inc = 80
overlap = wlen - inc
n2 = [i for i in range(wlen // 2)]
w1 = [i / overlap for i in range(overlap)]
w2 = [i / overlap for i in range(overlap - 1, -1, -1)]
wnd = np.hamming(wlen)
X = enframe(data, wnd, inc)
fn = X.shape[0]
Etmp = np.sum(np.power(X, 2), axis=1)
Ejemplo n.º 12
0
from chapter2_基础.soundBase import *
from chapter11_说话人识别.VQ import *
from chapter10_语音识别.DTW.DCW import mfccf

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

k = 8
N = 4

## 生成book
u = np.zeros(N)
for i in range(1, N + 1):
    s = 'VQ_data/SX' + str(i) + '.WAV'
    # s = 'VQ_data/mysound.WAV'
    data, fs, bits = soundBase(s).audioread()
    data /= np.max(data)
    mel = mfccf(12, data, fs)
    v = lbg(mel.T, k)
    u[i] = v

## 识别过程
M = 4  # 每个人有M个待识别的样本
l = 5

# 这部分需要用新的语音信号对于MATLAB调试查看结果
for iii in range(l):
    for i in range(M):
        Dstu = np.zeros(N)
        s = 'VQ_data/TX{}_{}.wav'.format(iii, i)
        data, fs, bits = soundBase(s).audioread()