예제 #1
0
def training(nfiltbank, orderLPC):
    nSpeaker = 2
    nCentroid = 16
    codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
    directory = os.getcwd() + '/train'
    fname = str()

    for i in range(nSpeaker):
        fname = '/speaker' + str(i + 1) + '.wav'
        print('Now speaker ', str(i + 1), 'features are being trained')
        (fs, s) = read(directory + fname)
        # read the wav file specified as first command line arg
        s = wav_to_floats(directory + fname)
        s = s[:48000]
        sd.play(s, fs)
        mel_coeff = mfcc(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid)

    codebooks = np.empty((2, nfiltbank, nCentroid))
    mel_coeff = np.empty((2, nfiltbank, 68))

    for i in range(2):
        fname = '/speaker' + str(i + 1) + '.wav'
        (fs, s) = read(directory + fname)
        s = s[:48000]
        mel_coeff[i, :, :] = mfcc(s, fs, nfiltbank)[:, 0:68]
        codebooks[i, :, :] = lbg(mel_coeff[i, :, :], nCentroid)

    plt.figure(nSpeaker + 1)
    s1 = plt.scatter(mel_coeff[0, 6, :],
                     mel_coeff[0, 4, :],
                     s=100,
                     color='r',
                     marker='o')
    c1 = plt.scatter(codebooks[0, 6, :],
                     codebooks[0, 4, :],
                     s=100,
                     color='r',
                     marker='+')
    s2 = plt.scatter(mel_coeff[1, 6, :],
                     mel_coeff[1, 4, :],
                     s=100,
                     color='b',
                     marker='o')
    c2 = plt.scatter(codebooks[1, 6, :],
                     codebooks[1, 4, :],
                     s=100,
                     color='b',
                     marker='+')
    plt.grid()
    plt.legend((s1, s2, c1, c2),
               ('Child', 'Parent', 'Child centroids', 'Parent centroids'),
               scatterpoints=1,
               loc='upper left')
    plt.show()

    return (codebooks_mfcc, codebooks_lpc)
예제 #2
0
def training(nfiltbank, orderLPC):

    #Get directory and list of *.wav files
    directory = os.getcwd() + '/train_all_speakers'
    wave_files = [f for f in os.listdir(directory)]

    nSpeaker = len(wave_files)
    nCentroid = 32  # original is 16
    codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))

    for i, wave_file in enumerate(wave_files):
        fname = '/' + wave_file
        print 'Speaker [' + str(
            i) + ']    File:' + wave_file + '    Training features...'
        (fs, s) = read(directory + fname)
        mel_coeff = mfcc_p(s, fs)
        mel_coeff = mel_coeff.transpose()
        mel_coeff[0, :] = np.zeros(mel_coeff.shape[1])

        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid)

    print('Training finished\n')

    return (codebooks_mfcc, codebooks_lpc)
예제 #3
0
def training(nfiltbank, orderLPC):
    nSpeaker = 8
    nCentroid = 16
    codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
    directory = os.getcwd() + '/train';
    fname = str()

    for i in range(nSpeaker):
        fname = '/s' + str(i+1) + '.wav'
        print('Now speaker ', str(i+1), 'features are being trained' )
        (fs,s) = read(directory + fname)
        mel_coeff = mfcc(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid)
        
        plt.figure(i)
        plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) +  ' centroids')
        for j in range(nCentroid):
            plt.subplot(211)
            plt.stem(codebooks_mfcc[i,:,j])
            plt.ylabel('MFCC')
            plt.subplot(212)
            markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j])
            plt.setp(markerline,'markerfacecolor','r')
            plt.setp(baseline,'color', 'k')
            plt.ylabel('LPC')
            plt.axis(ymin = -1, ymax = 1)
            plt.xlabel('Number of features')
    
    plt.show()
    print('Training complete')
    
    #plotting 5th and 6th dimension MFCC features on a 2D plane
    #comment lines 54 to 71 if you don't want to see codebook
    codebooks = np.empty((2, nfiltbank, nCentroid))
    mel_coeff = np.empty((2, nfiltbank, 68))
   
    for i in range(2):
        fname = '/s' + str(i+2) + '.wav'
        (fs,s) = read(directory + fname)
        mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68]
        codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid)
        
    
    plt.figure(nSpeaker + 1)
    s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100,  color = 'r', marker = 'o')
    c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+')
    s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100,  color = 'b', marker = 'o')
    c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+')
    plt.grid()
    plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left')    
    plt.show()
   
    
    return (codebooks_mfcc, codebooks_lpc)
예제 #4
0
def training(nfiltbank, orderLPC):
    nSpeaker = 8
    nCentroid = 16
    codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
    directory = os.getcwd() + '/train';
    fname = str()
    print('Speakers features are being trained.Please wait....')
    for i in range(nSpeaker):
        fname = '/s' + str(i+1) + '.wav'
        #print('Now speaker ', str(i+1), 'features are being trained' )
        (fs,s) = read(directory + fname)
        mel_coeff = mfcc(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid)
        
        plt.figure(i)
        plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) +  ' centroids')
        for j in range(nCentroid):
            plt.subplot(211)
            plt.stem(codebooks_mfcc[i,:,j])
            plt.ylabel('MFCC')
            plt.subplot(212)
            markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j])
            plt.setp(markerline,'markerfacecolor','r')
            plt.setp(baseline,'color', 'k')
            plt.ylabel('LPC')
            plt.axis(ymin = -1, ymax = 1)
            plt.xlabel('Number of features')
    
    plt.show()
    print('Training complete')
    
    #plotting 5th and 6th dimension MFCC features on a 2D plane
    #comment lines 54 to 71 if you don't want to see codebook
    codebooks = np.empty((2, nfiltbank, nCentroid))
    mel_coeff = np.empty((2, nfiltbank, 68))
   
    for i in range(2):
        fname = '/s' + str(i+2) + '.wav'
        (fs,s) = read(directory + fname)
        mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68]
        codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid)
        
    
    plt.figure(nSpeaker + 1)
    s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100,  color = 'r', marker = 'o')
    c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+')
    s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100,  color = 'b', marker = 'o')
    c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+')
    plt.grid()
    plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left')    
    plt.show()
   
    
    return (codebooks_mfcc, codebooks_lpc)
예제 #5
0
def training(nfiltbank, orderLPC):
    trainingSet = 4
    nCentroid = 16
    cbMfcc = np.empty((trainingSet,nfiltbank,nCentroid))
    cbLpc = np.empty((trainingSet, orderLPC, nCentroid))
    directory = os.getcwd() + '/train';
    fname = str()

    for i in range(trainingSet):
        fname = '/s' + str(i+1) + '.wav'
        print 'Voice ', str(i+1), 'is trained' 
        (fs,s) = read(directory + fname)
        MFCC = MFCC_Coeff(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        cbMfcc[i,:,:] = lbg(MFCC, nCentroid)
        cbLpc[i,:,:] = lbg(lpc_coeff, nCentroid)
        
        plt.figure(i)
        plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) +  ' centroids')
        for j in range(nCentroid):
            plt.subplot(211)
            plt.stem(cbMfcc[i,:,j])
            plt.ylabel('MFCC')
            plt.subplot(212)
            markerline, stemlines, baseline = plt.stem(cbLpc[i,:,j])
            plt.setp(markerline,'markerfacecolor','r')
            plt.setp(baseline,'color', 'k')
            plt.ylabel('LPC')
            plt.axis(ymin = -1, ymax = 1)
            plt.xlabel('Number of features')
    plt.show()
    print 'Training has been performed '
    codebooks = np.empty((2, nfiltbank, nCentroid))
    MFCC = np.empty((2, nfiltbank, 68))
   
    for i in range(2):
        fname = '/s' + str(i+2) + '.wav'
        (fs,s) = read(directory + fname)
        MFCC[i,:,:] = MFCC_Coeff(s, fs, nfiltbank)[:,0:68]
        codebooks[i,:,:] = lbg(MFCC[i,:,:], nCentroid)    
    plt.figure(trainingSet + 1)
    s1 = plt.scatter(MFCC[0,6,:], MFCC[0,4,:],s = 100,  color = 'r', marker = 'o')
    c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+')
    s2 = plt.scatter(MFCC[1,6,:], MFCC[1,4,:],s = 100,  color = 'b', marker = 'o')
    c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+')
    plt.grid()
    plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left')    
    plt.show()
   
    
    return (cbMfcc, cbLpc)
예제 #6
0
def training(filtbankN):
    Centroids = 16
    SpeakerN = 12
    file = str()
    codebooks_org = np.empty(
        (SpeakerN, filtbankN, Centroids))  #np.empty((층, 행, 열), dtype)
    #12개의 13행 16열 배열 생성

    for i in range(SpeakerN):
        file = 'Speaker' + str(i + 1) + '.wav'  #file 읽어오기
        (fs, sig) = read(file)  # fs = sample rate, sig = array data

        melcoeffs = MFCC(sig, fs, 0.025, 0.01, 13, 40)  # --------TESTING

        #mel feature을 생성하여 배열로 저장
        #winlen=0.025,winstep=0.01,numcep=13,nfilt=26,nfft=512
        # DCT 연산 수행
        # Filter Bank는 모두 Overlapping 되어 있기 때문에 Filter Bank 에너지들 사이에 상관관계가 존재하기 때문이다. DCT는 에너지들 사이에 이러한 상관관계를 분리 해주는 역할
        #26개 DCT Coefficient 들 중 12만 남겨야 하는데, 그 이유는 DCT Coefficient 가 많으면, Filter Bank 에너지의 빠른 변화를 나타내게 되고, 이것은 음성인식의 성능을 낮추게 되기 때문이다.
        print("---after mfcc---")
        print(np.shape(melcoeffs))
        print()

        melcoeffs = np.transpose(melcoeffs)  # --------TESTING
        #계산 편의를 위한 전치 행렬

        codebooks_org[i, :, :] = lbg(melcoeffs, Centroids)
        #codebooks배열에 저장

    print('\nTraining of model is complete!')

    return (codebooks_org)
예제 #7
0
def training(nfiltbank):
    nSpeaker = 4
    nCentroid = 4
    codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid))
    directory = os.getcwd() + '/train'
    fname = str()

    for i in range(nSpeaker):
        fname = '/s' + str(i + 1) + '.wav'
        print('Now speaker ', str(i + 1), 'features are being trained')
        (fs, s) = read(directory + fname)
        #REMOVE COMMENTS TO PLOT THE INPUT SPEAKER WAVE
        #audio = (fs,s)[1]
        # plot the first 1024 samples
        #plt.plot(audio[0:1024])
        # label the axes
        # plt.ylabel("Amplitude")
        # plt.xlabel("Time")
        # set the title

        #plt.title("Sample Wav")
        # display the plot
        #plt.show()
        mel_coeff = mfcc(s, fs, nfiltbank)
        codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid)

        plt.figure(i)
        for j in range(nCentroid):
            plt.subplot(211)
            plt.stem(codebooks_mfcc[i, :, j])
            plt.xlabel('Number of features')
            plt.ylabel('MFCC')
            plt.title('Codebook for speaker ' + str(i + 1))

    plt.show()
    print('Training completed')

    return (codebooks_mfcc)
예제 #8
0
codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid))

# Wskazanie katalogu gdzie przechowywane są pliki do trenowania.
directory = os.getcwd() + "/train"

# Pętla wczytania pliku, trenowania i pokazywania wykresu.
for i in range(nSpeaker):
    fname = "/s" + str(i + 1) + ".wav"
    print("Trenowany mówca nr: ", str(i + 1))

    # Wczytanie pliku.
    (fs, s) = read(directory + fname)

    # Zastosowanie metody trenującej.
    mel_coeff = mfcc(s, fs, nfiltbank)
    codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid)  ####Joanna changed

    # Tworzenei wykresu.
    plt.figure(i + 1)
    plt.title("Codebook dla mówcy " + str(i + 1) + " z " + str(nCentroid) +
              " centroidami")
    plt.xlabel("Liczba cech")
    ax1 = plt  # dla kilku subplot(2, 1, 1)
    plt.ylabel("MFCC")

    for j in range(nCentroid):
        ax1.stem(
            codebooks_mfcc[i, :, j],
            use_line_collection=True,
            linefmt="C2-.",
            markerfmt="C8o",
예제 #9
0
def training(filtbankN):
    Centroids = 16
    SpeakerN = 10
    dir_train = 'D:/_python/SpeakerRecognition/3/Data/train'
    file = str()
    codebooks_org = np.empty((SpeakerN, filtbankN, Centroids))

    for i in range(SpeakerN):
        file = '/Speaker' + str(i + 1) + '.wav'
        (fs, sig) = read(dir_train + file)
        print('Training the features of speaker ', str(i + 1))
        #        melcoeffs = MFCC(fs, sig, filtbankN)#--------TESTING
        melcoeffs = MFCC(sig, fs)  #--------TESTING
        melcoeffs = np.transpose(melcoeffs)  #--------TESTING
        codebooks_org[i, :, :] = lbg(melcoeffs, Centroids)

        plt.figure(i)
        plt.title('Speaker ' + str(i + 1) + ' codeword with ' +
                  str(Centroids) + ' centroids')
        for j in range(Centroids):
            plt.plot(211)
            plt.stem(codebooks_org[i, :, j])

    plt.show()
    print('Training of model is complete!')

    #ploting actual cook books for first two speakers
    codebooks = np.empty((2, filtbankN, Centroids))
    melcoeffs = np.empty((2, filtbankN, 68))

    for i in range(2):
        file = '/Speaker' + str(i + 1) + '.wav'
        (fs, sig) = read(dir_train + file)
        #        melcoeffs[i,:,:] = MFCC(fs, sig, filtbankN)[:,0:68]#--------TESTING
        #        codebooks[i,:,:] = lbg(melcoeffs[i,:,:], Centroids)#--------TESTING

        temp1 = MFCC(sig, fs)  #--------TESTING
        temp1 = np.transpose(temp1)  #--------TESTING
        melcoeffs[i, :, :] = temp1[:, 0:68]  #--------TESTING
        codebooks[i, :, :] = lbg(melcoeffs[i, :, :],
                                 Centroids)  #--------TESTING

    plt.figure(SpeakerN + 1)
    c1 = plt.scatter(codebooks[0, 4, :],
                     codebooks[1, 5, :],
                     s=100,
                     color='g',
                     marker='+')
    s1 = plt.scatter(melcoeffs[0, 4, :],
                     melcoeffs[0, 5, :],
                     s=100,
                     color='g',
                     marker='o')
    c2 = plt.scatter(codebooks[1, 4, :],
                     codebooks[1, 5, :],
                     s=100,
                     color='b',
                     marker='+')
    s2 = plt.scatter(melcoeffs[1, 4, :],
                     melcoeffs[1, 5, :],
                     s=100,
                     color='b',
                     marker='o')
    plt.grid()
    plt.legend((s1, s2, c1, c2),
               ('Speak1', 'Speak2', 'Speak1 centroids', 'Speak2 centroids'),
               scatterpoints=1,
               loc='lower right')
    plt.show()

    return (codebooks_org)