Esempio n. 1
0
def training(nfiltbank, orderLPC):

    #Get directory and list of *.wav files
    directory = os.getcwd() + '/train_all_speakers'
    wave_files = [f for f in os.listdir(directory)]

    nSpeaker = len(wave_files)
    nCentroid = 32  # original is 16
    codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))

    for i, wave_file in enumerate(wave_files):
        fname = '/' + wave_file
        print 'Speaker [' + str(
            i) + ']    File:' + wave_file + '    Training features...'
        (fs, s) = read(directory + fname)
        mel_coeff = mfcc_p(s, fs)
        mel_coeff = mel_coeff.transpose()
        mel_coeff[0, :] = np.zeros(mel_coeff.shape[1])

        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid)

    print('Training finished\n')

    return (codebooks_mfcc, codebooks_lpc)
Esempio n. 2
0
def training(nfiltbank, orderLPC):
    nSpeaker = 2
    nCentroid = 16
    codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
    directory = os.getcwd() + '/train'
    fname = str()

    for i in range(nSpeaker):
        fname = '/speaker' + str(i + 1) + '.wav'
        print('Now speaker ', str(i + 1), 'features are being trained')
        (fs, s) = read(directory + fname)
        # read the wav file specified as first command line arg
        s = wav_to_floats(directory + fname)
        s = s[:48000]
        sd.play(s, fs)
        mel_coeff = mfcc(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid)

    codebooks = np.empty((2, nfiltbank, nCentroid))
    mel_coeff = np.empty((2, nfiltbank, 68))

    for i in range(2):
        fname = '/speaker' + str(i + 1) + '.wav'
        (fs, s) = read(directory + fname)
        s = s[:48000]
        mel_coeff[i, :, :] = mfcc(s, fs, nfiltbank)[:, 0:68]
        codebooks[i, :, :] = lbg(mel_coeff[i, :, :], nCentroid)

    plt.figure(nSpeaker + 1)
    s1 = plt.scatter(mel_coeff[0, 6, :],
                     mel_coeff[0, 4, :],
                     s=100,
                     color='r',
                     marker='o')
    c1 = plt.scatter(codebooks[0, 6, :],
                     codebooks[0, 4, :],
                     s=100,
                     color='r',
                     marker='+')
    s2 = plt.scatter(mel_coeff[1, 6, :],
                     mel_coeff[1, 4, :],
                     s=100,
                     color='b',
                     marker='o')
    c2 = plt.scatter(codebooks[1, 6, :],
                     codebooks[1, 4, :],
                     s=100,
                     color='b',
                     marker='+')
    plt.grid()
    plt.legend((s1, s2, c1, c2),
               ('Child', 'Parent', 'Child centroids', 'Parent centroids'),
               scatterpoints=1,
               loc='upper left')
    plt.show()

    return (codebooks_mfcc, codebooks_lpc)
Esempio n. 3
0
def training(nfiltbank, orderLPC):
    """Train the speaker recognition model
    using .wav files from the Speaker folder.
    Feature extraction is done in two ways:
    MFCC and LPC

    Parameters
    ----------
    nfiltbank: number of filter banks for mfcc (default 13)
    orderLPC: number or order for LPC (default 15)

    Return
    ------
    Returns a pair (codebooks_mfcc, codebooks_lpc)
    which are the encoded clusters for trained speakers"""

    # Get directory and list of speaker *.wav files
    original_dir = os.getcwd()
    directory = os.getcwd() + '/static/speakers'

    os.chdir(directory)
    wave_files = filter(os.path.isfile, os.listdir(directory + "/"))
    wave_files = [os.path.join(directory, f)
                  for f in wave_files]  # add path to each file
    wave_files.sort(key=lambda x: os.path.getmtime(x))

    os.chdir(original_dir)

    def cutDirectoryFromName(file):
        if os.name == 'nt':
            for i, c in enumerate(file):
                if file[-(i + 1)] == "\\":
                    return file[-(i):]
        else:
            for i, c in enumerate(file):
                if file[-(i + 1)] == "/":
                    return file[-(i):]

    wave_files = list(map(lambda x: cutDirectoryFromName(x), wave_files))

    nSpeaker = len(wave_files)
    nCentroid = 32  # original is 16
    codebooks_mfcc = np.empty((nSpeaker, nfiltbank, nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))

    for i, wave_file in enumerate(wave_files):
        fname = '/' + wave_file
        (fs, s) = read(directory + fname)

        mel_coeff = mfcc(s, fs)
        mel_coeff = mel_coeff.transpose()
        mel_coeff[0, :] = np.zeros(mel_coeff.shape[1])

        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i, :, :] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i, :, :] = lbg(lpc_coeff, nCentroid)

    print('Training finished\n')

    return (codebooks_mfcc, codebooks_lpc)
Esempio n. 4
0
class test():
    def minDistance(features, codebooks):
        speaker = 0
        distmin = np.inf
        for k in range(np.shape(codebooks)[0]):
            D = EUDistance(features, codebooks[k, :, :])
            dist = np.sum(np.min(D, axis=1)) / (np.shape(D)[0])
            if dist < distmin:
                distmin = dist
                speaker = k

        return speaker

    for i in range(nSpeaker):
        fname = '/s' + str(i + 1) + '.wav'
        print 'Now speaker ', str(i + 1), 'features are being tested'
        (fs, s) = read(directory + fname)
        mel_coefs = mfcc(s, fs, nfiltbank)
        lpc_coefs = lpc(s, fs, orderLPC)
        sp_mfcc = minDistance(mel_coefs, codebooks_mfcc)
        sp_lpc = minDistance(lpc_coefs, codebooks_lpc)

        if ((sp_mfcc + 1) <= 5):
            b = 'vesa'
        elif ((sp_mfcc + 1) > 5) and ((sp_mfcc + 1) <= 10):
            b = 'belva'
        elif ((sp_mfcc + 1) > 10) and ((sp_mfcc + 1) <= 15):
            b = 'ary'

        print 'Speaker ', (i + 1), ' in test matches with speaker ', b, (
            sp_mfcc + 1), ' in train for training with MFCC'
        print 'Speaker ', (
            i + 1
        ), ' in test matches with speaker ', b, ' in train for training with LPC'
Esempio n. 5
0
def training(nfiltbank, orderLPC):
    nSpeaker = 8
    nCentroid = 16
    codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
    directory = os.getcwd() + '/train';
    fname = str()

    for i in range(nSpeaker):
        fname = '/s' + str(i+1) + '.wav'
        print('Now speaker ', str(i+1), 'features are being trained' )
        (fs,s) = read(directory + fname)
        mel_coeff = mfcc(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid)
        
        plt.figure(i)
        plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) +  ' centroids')
        for j in range(nCentroid):
            plt.subplot(211)
            plt.stem(codebooks_mfcc[i,:,j])
            plt.ylabel('MFCC')
            plt.subplot(212)
            markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j])
            plt.setp(markerline,'markerfacecolor','r')
            plt.setp(baseline,'color', 'k')
            plt.ylabel('LPC')
            plt.axis(ymin = -1, ymax = 1)
            plt.xlabel('Number of features')
    
    plt.show()
    print('Training complete')
    
    #plotting 5th and 6th dimension MFCC features on a 2D plane
    #comment lines 54 to 71 if you don't want to see codebook
    codebooks = np.empty((2, nfiltbank, nCentroid))
    mel_coeff = np.empty((2, nfiltbank, 68))
   
    for i in range(2):
        fname = '/s' + str(i+2) + '.wav'
        (fs,s) = read(directory + fname)
        mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68]
        codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid)
        
    
    plt.figure(nSpeaker + 1)
    s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100,  color = 'r', marker = 'o')
    c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+')
    s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100,  color = 'b', marker = 'o')
    c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+')
    plt.grid()
    plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left')    
    plt.show()
   
    
    return (codebooks_mfcc, codebooks_lpc)
Esempio n. 6
0
def training(nfiltbank, orderLPC):
    nSpeaker = 8
    nCentroid = 16
    codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid))
    codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
    directory = os.getcwd() + '/train';
    fname = str()
    print('Speakers features are being trained.Please wait....')
    for i in range(nSpeaker):
        fname = '/s' + str(i+1) + '.wav'
        #print('Now speaker ', str(i+1), 'features are being trained' )
        (fs,s) = read(directory + fname)
        mel_coeff = mfcc(s, fs, nfiltbank)
        lpc_coeff = lpc(s, fs, orderLPC)
        codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid)
        codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid)
        
        plt.figure(i)
        plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) +  ' centroids')
        for j in range(nCentroid):
            plt.subplot(211)
            plt.stem(codebooks_mfcc[i,:,j])
            plt.ylabel('MFCC')
            plt.subplot(212)
            markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j])
            plt.setp(markerline,'markerfacecolor','r')
            plt.setp(baseline,'color', 'k')
            plt.ylabel('LPC')
            plt.axis(ymin = -1, ymax = 1)
            plt.xlabel('Number of features')
    
    plt.show()
    print('Training complete')
    
    #plotting 5th and 6th dimension MFCC features on a 2D plane
    #comment lines 54 to 71 if you don't want to see codebook
    codebooks = np.empty((2, nfiltbank, nCentroid))
    mel_coeff = np.empty((2, nfiltbank, 68))
   
    for i in range(2):
        fname = '/s' + str(i+2) + '.wav'
        (fs,s) = read(directory + fname)
        mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68]
        codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid)
        
    
    plt.figure(nSpeaker + 1)
    s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100,  color = 'r', marker = 'o')
    c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+')
    s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100,  color = 'b', marker = 'o')
    c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+')
    plt.grid()
    plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left')    
    plt.show()
   
    
    return (codebooks_mfcc, codebooks_lpc)
Esempio n. 7
0
def recognize_speaker(codebooks_mfcc, codebooks_lpc, wav):
    """
    Predicts speaker for .wav file using trained clusters

    Parameters
    ---------
    codebooks_mfcc: trained clusters for MFCC
    codebooks_lpc: trained clusters for LPC

    Returns
    -------
    A pair of strings for the speaker prediction of both methods MFCC and LPC

    """
    def minDistance(features, codebooks):
        speaker = 0
        distmin = np.inf
        for k in range(np.shape(codebooks)[0]):
            D = EUDistance(features, codebooks[k, :, :])
            dist = np.sum(np.min(D, axis=1)) / (np.shape(D)[0])
            # print "DISTANCE!!!" + str(dist)
            if dist < distmin:
                distmin = dist
                speaker = k

        return speaker

    #Read .wave file
    (fs, s) = read(wav)

    # Passing test file to MFCC
    mel_coefs = mfcc(s, fs)
    mel_coefs = mel_coefs.transpose()
    mel_coefs[0, :] = np.zeros(
        mel_coefs.shape[1]
    )  # 0th coefficient does not carry significant information

    # Passing test file to LPC
    lpc_coefs = lpc(s, fs, 15)
    sp_mfcc = minDistance(mel_coefs, codebooks_mfcc)
    sp_lpc = minDistance(lpc_coefs, codebooks_lpc)

    #Find the speaker stored in the textfile
    with open("Speaker_Names.txt") as f:
        content = f.readlines()
    # you may also want to remove whitespace characters like `\n` at the end of each line
    speaker_names = [x.strip() for x in content]

    return ("You are " + speaker_names[sp_mfcc] + '\n',
            "You are " + speaker_names[sp_lpc] + '\n')
Esempio n. 8
0
def test1234():
    list_of_files = glob.glob('D:/FTP/speaker/*.wav')
    latest_file = max(list_of_files, key=os.path.getmtime)
    #print latest_file
    stamp = latest_file
    #print (latest_file)
    print(stamp)
    (fs, s) = read(stamp)
    mel_coefs = mfcc(s, fs, nfiltbank)
    lpc_coefs = lpc(s, fs, orderLPC)
    sp_mfcc = minDistance(mel_coefs, codebooks_mfcc)
    sp_lpc = minDistance(lpc_coefs, codebooks_lpc)
    b = namaorang(sp_mfcc)
    print('Speaker ', (stamp), ' in test matches with speaker ', b,
          (sp_mfcc + 1), ' in train for training with MFCC')
    for k in range(np.shape(codebooks)[0]):
        D = EUDistance(features, codebooks[k, :, :])
        dist = np.sum(np.min(D, axis=1)) / (np.shape(D)[0])
        if dist < distmin:
            distmin = dist
            speaker = k

    return speaker


for i in range(nSpeaker):
    fname = '/s' + str(i + 1) + '.wav'
    print('Now speaker ', str(i + 1), 'features are being tested')
    (fs, s) = read(directory + fname)
    mel_coefs = mfcc(s, fs, nfiltbank)
    lpc_coefs = lpc(s, fs, orderLPC)
    sp_mfcc = minDistance(mel_coefs, codebooks_mfcc)
    sp_lpc = minDistance(lpc_coefs, codebooks_lpc)

    print('Speaker ', (i + 1), ' in test matches with speaker ', (sp_mfcc + 1),
          ' in train for training with MFCC')
    print('Speaker ', (i + 1), ' in test matches with speaker ', (sp_lpc + 1),
          ' in train for training with LPC')

    if i == sp_mfcc:
        nCorrect_MFCC += 1
    if i == sp_lpc:
        nCorrect_LPC += 1

percentageCorrect_MFCC = (nCorrect_MFCC / nSpeaker) * 100
print('Accuracy of result for training with MFCC is ', percentageCorrect_MFCC,