Esempio n. 1
0
def mel_dist(x1, x2, fs, num, wlen, inc):
	"""
	计算两信号x1,x2的MFCC参数和距离
	:param x1: signal 1
	:param x2: signal 2
	:param fs: sample frequency
	:param num: the number we select in MFCC
	:param wlen: frame length
	:param inc: frame shift
	:return Dcep: distance
	:return Ccep1, Ccep2: num MFCC
	"""
	M = MFCC()
	ccc1 = M.mfcc(x1, Fs, num, wlen, inc)		# MFCC
	ccc2 = M.mfcc(x2, Fs, num, wlen, inc)
	fn1 = np.shape(ccc1)[0]		# frame number
	Ccep1 = ccc1[:, 0 : num]
	Ccep2 = ccc2[:, 0 : num]

	Dcep = np.zeros(fn1)	# distance
	for i in range(fn1):
		Cn1 = Ccep1[i, :]
		Cn2 = Ccep2[i, :]
		Dstu = 0
		for k in range(num):
			Dstu = Dstu + (Cn1[k] - Cn2[k]) ** 2

		Dcep[i] = np.sqrt(Dstu)


	return Dcep, Ccep1, Ccep2
Esempio n. 2
0
def super_vector(test_file_name, ubm_file):
    wav = mywave()
    waveData = wav.WaveRead(test_file_name)
    waveVadIdx = vad(waveData**2)
    waveData = waveData[waveVadIdx]

    MFCC_obj = MFCC(40, 12, 300, 3400, 0.97, 16000, 50, 0.0256, 256)
    MFCC_coef = MFCC_obj.sig2s2mfc(waveData)

    ubm = GMM(n_mix=128, n_dim=12)
    ubm.read(ubm_file)
    ubm.adapt(MFCC_coef)

    return ubm.means
Esempio n. 3
0
def test(signal, fs, feat_list):
    mean = np.average(signal)
    energy = np.sum(np.abs(signal - mean))
    signal = signal / energy * 100

    mfcc = MFCC.MFCC(signal, fs, Frame_Len, Hop_Len)
    digit = 0
    c = DTW.DTW(mfcc, feat_list[0])
    dis = c

    for i in range(0, len(feat_list)):
        c = DTW.DTW(mfcc, feat_list[i])
        if c < dis:
            digit = i
            dis = c

    return digit, dis
Esempio n. 4
0
    speech = Speech()
    xx, fs = speech.audioread(filename, 8000)
    xx = xx - np.mean(xx)  # DC
    x = xx / np.max(xx)  # normalized
    N = len(x)
    time = np.arange(N) / fs
    noisy = Noisy()
    signal, _ = noisy.Gnoisegen(x, SNR)  # add noise
    wnd = np.hamming(wlen)  # window function
    overlap = wlen - inc
    NIS = int((IS * fs - wlen) / inc + 1)  # unvoice segment frame number
    y = speech.enframe(signal, list(wnd), inc).T
    fn = y.shape[1]  # frame number
    frameTime = speech.FrameTime(fn, wlen, inc, fs)  # frame to time

    Mfcc = MFCC()
    ccc = Mfcc.mfcc(signal, fs, 16, wlen, inc)  # MFCC
    fn1 = ccc.shape[0]  # frame number
    frameTime1 = frameTime[2:fn - 2]
    Ccep = ccc[:, 0:16]  # MFCC coefficient
    C0 = np.mean(
        Ccep[0:5, :],
        axis=0)  # calculate approximate average noise MFCC coefficient
    Dcep = np.zeros(fn)
    for i in range(5, fn1):
        Cn = Ccep[i, :]  # one frame MFCC cepstrum coefficient
        Dstu = 0
        for k in range(16):  # calculate the MFCC cepstrum distance
            Dstu += (Cn[k] - C0[k])**2  # between each frame and noise
        Dcep[i] = np.sqrt(Dstu)
    Dcep[0:5] = Dcep[5]
Esempio n. 5
0
def template(signal, fs):
    mean = np.average(signal)
    energy = np.sum(np.abs(signal - mean))
    signal = signal / energy * 100
    mfcc = MFCC.MFCC(signal, fs, Frame_Len, Hop_Len)
    return mfcc
Esempio n. 6
0
 ubm_dir = 'train_data_for_UBM'
 ubm_data_dirs = os.listdir(ubm_dir)
 dim = 12
 sig = np.array([])
 features_M = np.ndarray(shape=(0, dim), dtype='float64')
 features_F = np.ndarray(shape=(0, dim), dtype='float64')
 features = np.ndarray(shape=(0, dim), dtype='float64')
 wav = mywave()
 print 'hello'
 for ubm_data_dir in ubm_data_dirs:
     print 'hello'
     print ubm_data_dir
     if ubm_data_dir == '.DS_Store':
         continue
     sig = wav.WaveRead(ubm_dir + r'/' + ubm_data_dir)
     MFCC_obj = MFCC(40, 12, 300, 3400, 0.97, 16000, 50, 0.0256, 256)
     MFCC_coef = MFCC_obj.sig2s2mfc(sig)
     #energy = np.ndarray(shape = (MFCC_coef.shape[0],1),dtype = 'float64')
     #energy[:,0] = 10*numpy.log10((MFCC_coef**2).sum(axis=1))
     #MFCC_coef = np.hstack((MFCC_coef,energy))
     """
     dtm1 = np.ndarray(shape = MFCC_coef.shape,dtype = 'float64' ) 
     #初始化dtm1
     dtm1[0:2,:] = 0
     dtm1[MFCC_coef.shape[0]-2:MFCC_coef.shape[0],:] = 0;  
     #计算dtm1
     for loop2 in range(2,MFCC_coef.shape[0]-2):
         dtm1[loop2,:] = -2*MFCC_coef[loop2-2,:]-MFCC_coef[loop2-1,:]+MFCC_coef[loop2+1,:]+2*MFCC_coef[loop2+2,:]
     dtm1 = dtm1/3;
     dtm2 = np.ndarray(shape = MFCC_coef.shape,dtype = 'float64' )
     #初始化dtm2
Esempio n. 7
0
 length = 10*samprate
 
 ubms_dir = 'ubms'
 supervector_dir = 'supervector'
 if not os.path.exists(supervector_dir):
     os.mkdir(supervector_dir)
 
 train_data_dir = 'train_data'
 train_data = os.listdir(train_data_dir)
 
 wav = mywave.mywave()
 
 special = ['train_2013011050_mlh_M.wav','train_2013011055_huangyiqing_M.wav',
            'train_2013012061_huangxinyuan_M.wav']
 
 MFCC_obj = MFCC(40,12,300,3400,0.97,samprate,50,0.0256,256)
 dim = 12
 
 for train_wav in train_data:
     if train_wav == '.DS_Store':
         continue
     print train_wav
     if not os.path.exists(supervector_dir+r'/'+train_wav[6:16]):
         os.mkdir(supervector_dir+r'/'+train_wav[6:16])
     wave = wav.WaveRead(train_data_dir+r'/'+train_wav)
     piece_num = int(wave.shape[0]/length)
     print 'sum of piece:', piece_num
     temp = range(piece_num)
     for loop in temp:
         print 'piece:',loop+1
         beginp = loop*samprate
Esempio n. 8
0
                    glob.glob('Train/*')
            ):  # pick one speaker at a time in the folder Train
                name_speaker = speaker
                # print(name_speaker)
                speaker_list.update(
                    {name_speaker:
                     key})  # store the speaker along with a key in the list

                speaker_data = [
                ]  # to store the mel frequency cepstral coefficients for different audio signals of
                # same speaker

                for speaker_train in glob.glob(
                        speaker +
                        '/*.wav'):  # pick all the audio clips of each speaker
                    mfcc = feature.MFCC(
                        speaker_train)  # extract mfcc for each audio clip
                    if len(speaker_data) == 0:
                        speaker_data = mfcc
                    else:
                        speaker_data = numpy.concatenate(
                            (speaker_data, mfcc)
                        )  # store mfccs of all audio clips of single speaker

                # print(speaker_data.shape)
                gaussian_mixture_model = mixture.GaussianMixture(
                    n_components=num,
                    covariance_type=matrix,
                    max_iter=100,
                    init_params=initial)  # create gmm using library in
                # sklearn package
                gaussian_mixture_model = gaussian_mixture_model.fit(
Esempio n. 9
0
def MFCC(signal):
    MFCC.MFCC()