def MFCC_SVM(audioFileName,model,DEBUG):
    '''
    Calculate MFCC features from audio frames and use
    trained SVM for frame prediction:
    1. divide audio file into overlapping frames 
    2. window the frame
    3. compute MFCC features for each frame
    4. predict using SVM trained on MFCC features
    
    Input:
        audioFileName : input test audio file
        model : SVM model
        DEBUG : debug flag
    Output:
        label : predicted label for all frames of the audio
        tag :  probability of prediction    
     
    '''
    
    #,trainingDataFile,label
    fs,audio = read (audioFileName)
    audio = audio/float(4000)        
    # frame duration in ms
    frame_length = 25
    # overlap duration in ms
    frame_overlap = 10
    N = len (audio)
    nsample = round(frame_length*fs/1000)
    noverlap = round(frame_overlap*fs/1000)
    # FFT length
    NFFT = 2*nsample
    # Hanning window
    window = hann(nsample)
    offset = nsample-noverlap
    max_m = round((N-NFFT)/offset)    
    
    numFilter = 26
    fl = 0*fs
    fh = 0.5*fs
    melFilBank  = util.melFilterBank(numFilter, int(NFFT/2), fs, fl, fh)
    coeffs = 13 
    N1 = 5
    flag = 0
    count = 0
    flag2 = 0    
    
    frames = int(max_m)
      
    mQ = []
    dQ = []
    tag = [] 
    ZCR = []
    F0 = []    
    label = []
    alpha = 0 # pre-emphasis factor
    
    for m in range(0,frames):
        begin = m*offset 
        iend = m*offset + nsample
        frame = audio[begin:iend]
        Frame = preEmphasis(frame,alpha)
        magy = powerSpectrum(Frame,window,NFFT)
        mfccfeature = util.mfccFeature(magy[0:int(NFFT/2)], melFilBank, coeffs)
        mfccfeature =mfccfeature/np.absolute(mfccfeature).max(0)
        mQ.append(mfccfeature)
        
        zcr_temp = zeroCrossingRate(Frame,window)        
        f0 = fundamentalFreq(Frame,window,fs)
        ZCR.append(zcr_temp)
        F0.append(f0)        
        
        if m%N1 == N1-1 or flag == 1:
            
            MD = util.deltaCoefficients(np.asarray(mQ))
            MD = MD/np.absolute(MD).max(0)
            dQ.append(MD)
            mQ.pop(0)
            ZCR.pop(0)
            F0.pop(0)            
            flag = 1             
                                    
            if count%N1 == N1-1 or flag2 == 1:
                
                MDD = util.deltaCoefficients(np.asarray(dQ))
                MDD = MDD/np.absolute(MDD).max(0)
                dQ.pop(0)
                               
                feature =[mQ[0],dQ[2],MDD]                
                feature1 = np.reshape(np.vstack(feature),3*coeffs)
                feature2 = feature1.tolist()
                feature2.append(ZCR[0])
                feature2.append(F0[0])
                feature2.append(ZCR[0]-ZCR[1])
                feature2.append(F0[0]-F0[1])                  
                p_label,p_val = framePrediction(feature2,model)
                tag.append(p_val[0][0])
                label.append(p_label[0]>0) 
                  
                flag2 = 1           
               
            count = count+1
    if (DEBUG):  
        T = np.arange(round(nsample/2),N-1-round(nsample/2),(nsample-noverlap))/fs;
        #L1 = T [0:int(max_m)];   
        t = np.linspace(0,N,N)/fs;
        L2 = T [8:len(tag)+8];      
        
        plt.subplot(3,1,1)
        plt.plot(t,audio)    
        plt.xlabel("Time")
        plt.ylabel("Amplitude")
        plt.title(audioFileName)
        plt.xlim([0, T[-1]])
        
        plt.subplot(3,1,2)
        plt.plot(L2,tag,color = 'r')
        plt.xlabel("Time")
        plt.ylabel("SVM-tag")
        plt.ylim([-1.2,1.2])
        plt.xlim([0, T[-1]])        
        
        
        plt.subplot(3,1,3)
        plt.plot(L2,label,color = 'g')
        plt.xlabel("Time")
        plt.ylabel("SVM-tag")
        plt.ylim([-1.2,1.2])
        plt.xlim([0, T[-1]])        
        plt.show()
        
    return label,tag
def MFCCfeatureExtraction(audioFileName,trainingDataFile,label,DEBUG):
    '''    
    MFCC feature extraction for training data generation.
    '''
    
    #,trainingDataFile,label
    fs,audio = read (audioFileName)
    audio = audio/float(4000)        
    # frame duration in ms
    frame_length = 25
    # overlap duration in ms
    frame_overlap = 10
    N = len (audio)
    nsample = round(frame_length*fs/1000)
    noverlap = round(frame_overlap*fs/1000)
    # FFT length
    NFFT = 2*nsample
    # Hanning window
    window = hann(nsample)
    offset = nsample-noverlap
    max_m = round((N-NFFT)/offset)    
    
    numFilter = 26
    fl = 0*fs
    fh = 0.5*fs
    melFilBank  = util.melFilterBank(numFilter, int(NFFT/2), fs, fl, fh)
    coeffs = 13 
    N1 = 5
    flag = 0
    count = 0
    flag2 = 0    
    
    frames = int(max_m)
    if (DEBUG):        
        mfccTrack = np.zeros((coeffs,frames))
        deltaTrack = np.zeros((coeffs,frames))
        delta2Track = np.zeros((coeffs,frames))
    
    mQ = []
    dQ = []
    ZCR = []
    F0 = []    
    
    for m in range(0,frames):
        begin = m*offset 
        iend = m*offset + nsample
        Frame = audio[begin:iend]
        magy = powerSpectrum(Frame,window,NFFT)
        mfccfeature = util.mfccFeature(magy[0:int(NFFT/2)], melFilBank, coeffs)
        mfccfeature = mfccfeature/np.absolute(mfccfeature).max(0)
        mQ.append(mfccfeature)
        
        zcr_temp = zeroCrossingRate(Frame,window)        
        f0 = fundamentalFreq(Frame,window,fs)
        ZCR.append(zcr_temp)
        F0.append(f0)
        
        if (DEBUG) :
            mfccTrack[0:coeffs,m] = mfccfeature[0:coeffs]  
        
        if m%N1 == N1-1 or flag == 1:
            
            MD = util.deltaCoefficients(np.asarray(mQ))
            MD = MD/np.absolute(MD).max(0)
            dQ.append(MD)
            mQ.pop(0)
            ZCR.pop(0)
            F0.pop(0)            
            
            flag = 1
             
            if (DEBUG) : 
                deltaTrack[0:coeffs,m] = MD[0:coeffs]           
                        
            if count%N1 == N1-1 or flag2 == 1:
                
                MDD = util.deltaCoefficients(np.asarray(dQ))
                MDD = MDD/np.absolute(MDD).max(0)
                dQ.pop(0)
                               
                feature =[mQ[0],dQ[2],MDD]                
                feature1 = np.reshape(np.vstack(feature),3*coeffs)
                feature2 = feature1.tolist()
                feature2.append(ZCR[0])
                feature2.append(F0[0])
                feature2.append(ZCR[0]-ZCR[1])
                feature2.append(F0[0]-F0[1])   
                if (trainingDataFile):                                  
                    writetoFile(trainingDataFile,label,feature2)
                flag2 = 1
                
                if(DEBUG):
                    delta2Track[0:coeffs,m] = MDD[0:coeffs]
            count = count+1
            
    if (DEBUG):  
        T = np.arange(round(nsample/2),N-1-round(nsample/2),(nsample-noverlap))/fs;
        L1 = T [0:int(max_m)];   
        t = np.linspace(0,N,N)/fs;
        
        plt.subplot(4,1,1)
        plt.plot(t,audio)    
        plt.xlabel("Time")
        plt.ylabel("Amplitude")
        plt.title(audioFileName)
        plt.xlim([0, T[-1]])
        
        plt.subplot(4,1,2)
        plt.plot(L1,mfccTrack[0,:],color='r')
        plt.plot(L1,mfccTrack[1,:],color='g')
        plt.plot(L1,mfccTrack[2,:],color='b')
        plt.xlim([0, T[-1]])
        
        plt.subplot(4,1,3)
        plt.plot(L1,deltaTrack[0,:],color='r')
        plt.plot(L1,deltaTrack[1,:],color='g')
        plt.plot(L1,deltaTrack[2,:],color='b')
        plt.xlim([0, T[-1]])
            
        plt.subplot(4,1,4)
        plt.plot(L1,delta2Track[0,:],color='r')
        plt.plot(L1,delta2Track[1,:],color='g')
        plt.plot(L1,delta2Track[2,:],color='b')
        plt.xlim([0, T[-1]])
        plt.show()