Ejemplo n.º 1
0
def segmentvad(feat, amplitude, dist_1, numfrwin, nsh, pflin, fs):

    #Performing Segmentation

    win_ind_1 = 0
    win_ind_2 = win_ind_1 + numfrwin

    dim = len(feat[:, 1])  #to find the dimensions of the FEATURE

    dist = 0
    count = 0

    w1 = np.zeros((dim, numfrwin))
    w2 = np.zeros((dim, numfrwin))

    d = []
    frame_no = []

    num_frame = len(feat[1, :])
    Nw = math.floor(fs * 0.03)
    Nsh = math.floor(fs * nsh)  #0.010 by default

    frame_index_w1 = 0
    frame_index_w2 = 0 + numfrwin * Nsh

    while (win_ind_2 + numfrwin < num_frame):
        #finding the KL-DIVERGENCE between W1/W2

        w1[:, 0:numfrwin] = feat[:, win_ind_1:win_ind_1 + numfrwin]
        w2[:, 0:numfrwin] = feat[:, win_ind_2:win_ind_2 + numfrwin]

        cov1 = []
        cov2 = []
        mean1 = []
        mean2 = []

        mean1 = np.mean(w1, 1)
        mean2 = np.mean(w2, 1)
        cov1 = np.var(w1, 1)
        cov1 = np.diag(cov1)

        cov2 = np.var(w2, 1)
        cov2 = np.diag(cov2)

        mean1.shape = (1, dim)
        mean2.shape = (1, dim)

        dist1 = (np.trace(np.dot(inv(cov2), cov1)))
        dist2 = np.dot((mean2 - mean1), inv(cov2))
        mean1.shape = (dim, 1)
        mean2.shape = (dim, 1)
        dist2 = np.dot(dist2, (mean2 - mean1))
        k = dim
        dist3 = (np.linalg.det(cov2) / np.linalg.det(cov1))
        dist3 = np.log(dist3)
        dist = 0.5 * (dist1 + dist2 - k + dist3)

        d.append(dist)

        frame_no.append(frame_index_w2)
        win_ind_1 = win_ind_1 + 1
        win_ind_2 = win_ind_2 + 1
        frame_index_w2 = win_ind_2 * Nsh
        frame_index_w3 = win_ind_1 * Nsh

    d = np.array(d)
    d.shape = (len(d), )
    d = d.tolist()
    frame_no = np.array(frame_no)
    time_stamps = frame_no / fs
    frame_no.shape = (len(frame_no), )
    time_stamps.shape = (len(frame_no), )
    frame_no = frame_no.tolist()
    time_stamps = time_stamps.tolist()

    d1 = np.zeros((numfrwin, ))
    d1 = d1.tolist()
    d1.extend(d)

    #Finding the Peaks to Identify the Change points
    b = []
    max1, min2 = peakdet(
        d1, dist_1)  #max1 gives maxima peaks;min2 gives minima peaks
    temp = []
    temp.append(feat)
    siz = max1.size
    if (siz == 0
        ):  #If no change point found, return the entire feat file as it is
        return b, b, b, b, temp, temp

    time_stamp = []

    for i in range(0, len(max1[:, 0])):
        if (max1[i, 1] < amplitude):
            max1[i, 1] = 0
        else:
            time_stamp.append(max1[i, 0] * (Nsh) / fs)

    frame_stamp = max1[:, 0]
    frame_stamp = frame_stamp.tolist()

    clus = []
    i = 0

    lasfram = len(feat[1, :])

    #segmenting the features depending on change points
    Nen = int(frame_stamp[0])
    Nst = 0
    clus.append(feat[:, Nst:Nen])

    while (i < len(time_stamp) - 1):

        Nst = int(frame_stamp[i])
        Nen = int(frame_stamp[i + 1])
        clus.append(feat[:, Nst:Nen])

        i = i + 1

    Nst = int(frame_stamp[len(frame_stamp) - 1])
    Nen = lasfram
    clus.append(feat[:, Nst:Nen])

    counlin = 1
    cluslin = []
    cov_lin = []
    mfcc_lin = []
    clus2 = clus
    ts_lin = []
    fs_lin = []

    #performing linear clustering
    while (counlin < len(clus2)):

        if (counlin <= 1):
            bicdist = bicdist_single(clus2[counlin], clus2[counlin - 1], pflin)
            if (bicdist < 0):
                clus3 = np.concatenate((clus2[counlin], clus2[counlin - 1]),
                                       axis=1)

                cluslin.append(clus3)

            else:
                cluslin.append(clus2[counlin - 1])
                cluslin.append(clus2[counlin])
                ts_lin.append(time_stamp[counlin - 1])
                fs_lin.append(frame_stamp[counlin - 1])
        else:
            bicdist = bicdist_single(clus2[counlin], cluslin[len(cluslin) - 1],
                                     pflin)
            if (bicdist < 0):

                clus3 = np.concatenate(
                    (cluslin[len(cluslin) - 1], clus2[counlin]), axis=1)
                cluslin[len(cluslin) - 1] = clus3

            else:

                cluslin.append(clus2[counlin])
                ts_lin.append(time_stamp[counlin - 1])
                fs_lin.append(frame_stamp[counlin - 1])
        counlin = counlin + 1

    return time_stamp, frame_stamp, ts_lin, fs_lin, clus, cluslin
Ejemplo n.º 2
0
def clus_vad1_spk(cluslin, pfo, tag, spkrs, verbose):

    count = 0
    i_ind = []
    j_ind = []
    dist = []
    covmat_new = []
    bic = -0.01
    clus1 = cluslin

    print('After VAD+Segmentation+Linear Clustering')
    print(str(len(cluslin)) + ' Clusters ')
    count1 = 0
    bicact = []

    tslin = []
    dist_dict = {}

    #Performing Agglomerative Clustering
    #Using Dictionaries to store Values of BIC Values calculated in the previous iteration, so that dont need to recompute same values.

    while ((len(clus1) > spkrs)):
        i_ind = []
        j_ind = []
        dist = []

        if (count1 == 0):
            clus1 = cluslin

        else:
            clus1 = clusnew
            dist_dict = dictnew

        for i in range(0, len(clus1) - 1):
            for j in range(i + 1, len(clus1)):

                indexa = str(i) + '_' + str(j)
                if indexa in dist_dict:
                    dist.append(dist_dict[indexa])
                else:

                    jsdist = bicdist_single(clus1[i], clus1[j], pfo)

                    dist_dict.update({indexa: jsdist})
                    dist.append(dist_dict[indexa])

                i_ind.append(i)
                j_ind.append(j)

            count = count + 1
        if (len(dist) == 0):
            break

        bic = min(dist)
        if (verbose == 1):
            print('BIC SCORE: ' + str(bic))
        bicact.append(bic)
        c1 = dist.index(bic)
        el1 = i_ind[c1]
        el2 = j_ind[c1]  #Merge segments :el1 and el2
        #print(el1)
        #print(el2)
        if (verbose == 1):
            print('ELEMENTS MERGED ARE :' + str(el1) + ' and ' + str(el2))
        clusnew = []

        dictnew = {}

        clus3 = np.concatenate((clus1[el1], clus1[el2]), axis=1)
        if (verbose == 1):
            print('Shape Of Merged Elements ' + str(clus3.shape))

        for k in range(0, len(clus1)):
            if ((k != el1) & (k != el2)):
                clusnew.append(clus1[k])

        clusnew.append(clus3)

        #now finding new distance dictionary
        flagm = 0
        flagn = 0
        for m in range(0, len(clus1) - 1):
            for n in range(m + 1, len(clus1)):
                indexa = str(m) + '_' + str(n)
                con = not ((m == el1) or (m == el2) or (n == el1) or
                           (n == el2))
                if (con):

                    if ((m < el1) and (n < el1)):
                        str1 = str(m) + '_' + str(n)
                        dictnew.update({str1: dist_dict[indexa]})
                    elif ((m < el1) and (n > el1) and (n < el2)):
                        str1 = str(m) + '_' + str(n - 1)
                        dictnew.update({str1: dist_dict[indexa]})
                    elif ((m < el1) and (n > el2)):
                        str1 = str(m) + '_' + str(n - 2)
                        dictnew.update({str1: dist_dict[indexa]})
                    elif ((m > el1) and (n < el2)):
                        str1 = str(m - 1) + '_' + str(n - 1)
                        dictnew.update({str1: dist_dict[indexa]})
                    elif ((m > el1) and (n > el2) and (m < el2)):
                        str1 = str(m - 1) + '_' + str(n - 2)
                        dictnew.update({str1: dist_dict[indexa]})
                    elif ((m > el2) and (n > el2)):
                        str1 = str(m - 2) + '_' + str(n - 2)
                        dictnew.update({str1: dist_dict[indexa]})
                    else:
                        print('error in indexing ')
                        print(
                            str(el1) + ' ' + str(el2) + ' m/n=> ' + str(m) +
                            ' ' + str(n))
                        print(con)

        count1 = count1 + 1
        if (verbose == 1):
            print('Number of Clusters Remaining ' + str(len(clus1)))

    return clus1
Ejemplo n.º 3
0
def clusterDIAR(wav_file,feat_file,pfo,pflin,tag,numfrwin,nsh,spkrs,amplitude,dist_1,verbose):

  
    time_stamp,frame_stamp,feat = segment(wav_file,feat_file,amplitude,dist_1,numfrwin,nsh)
    biclin=[]
    i=0
    Nst=0
    Nen=0
    clus=[]
    cov_mat=[]
   


    while(i<len(time_stamp)-1):
        if(i==0):
          Nen=int(frame_stamp[i])
          Nst=0
          clus.append(feat[:,Nst:Nen])
          Nst=int(frame_stamp[i])
          Nen=int(frame_stamp[i+1])
          clus.append(feat[:,Nst:Nen])
                  
        else:
          Nst=int(frame_stamp[i])
          Nen=int(frame_stamp[i+1])
          clus.append(feat[:,Nst:Nen])
        
        
        i=i+1





    #Linear CLustering Done Here
    counlin=1
    cluslin=[]
    cov_lin=[]
    mfcc_lin=[]
    clus2=clus
    cov_mat2=cov_mat
    ts_lin=[]

    while(counlin<len(clus2)):

        if(counlin<=1):

            bicdist=bicdist_single(clus2[counlin],clus2[counlin-1],pflin)
            biclin.append(bicdist)
            if(bicdist<0):
                clus3=np.concatenate((clus2[counlin],clus2[counlin-1]),axis=1)
                
                cluslin.append(clus3)
                
            else:
                cluslin.append(clus2[counlin-1])
                cluslin.append(clus2[counlin])
                ts_lin.append(time_stamp[counlin-1])
        else:

             bicdist=bicdist_single(clus2[counlin],cluslin[len(cluslin)-1],pflin)
             biclin.append(bicdist)
             if(bicdist<0):

                 
                clus3=np.concatenate((cluslin[len(cluslin)-1],clus2[counlin]),axis=1)
                cluslin[len(cluslin)-1]=clus3
             
                
                
             else:
                x,y=clus2[counlin].shape
                if(y>10):
                    
                    cluslin.append(clus2[counlin])
                    ts_lin.append(time_stamp[counlin-1])
                else:
                     
                    clus3=np.concatenate((cluslin[len(cluslin)-1],clus2[counlin]),axis=1)
                    cluslin[len(cluslin)-1]=clus3
                    
                                    
        counlin=counlin+1     



    count=0
    i_ind=[]
    j_ind=[]
    dist=[]
    covmat_new=[]
    bic=-0.01
    clus1=cluslin
  
   
   
    print('original cluster:')
    print(len(clus))
    print('after linear clustering')
    print(len(cluslin))
    count1=0
    bicact=[]

    tslin=[]
    dist_dict={}

    if(spkrs=='None'):
        
        while((len(clus1)>1)&(bic<0)):
            i_ind=[]
            j_ind=[]
            dist=[]
            
            if(count1==0):
                clus1=cluslin

            else:
                clus1=clusnew
                dist_dict=dictnew


            for i in range(0,len(clus1)-1):
                for j in range(i+1,len(clus1)):
                    
                    indexa=str(i)+'_'+str(j)
                    if indexa in dist_dict:
                        dist.append(dist_dict[indexa])
                    else:
                       
                        jsdist=bicdist_single(clus1[i],clus1[j],pfo)
                        
                        dist_dict.update({indexa:jsdist})
                        dist.append(dist_dict[indexa])

                    i_ind.append(i)
                    j_ind.append(j)
                    
                count=count+1     
            if(len(dist)==0):
                break

            bic=min(dist)
            if(verbose==1):
                print("BIC SCORE IS "+str(bic))
            bicact.append(bic)
            c1=dist.index(bic)
            el1=i_ind[c1]
            el2=j_ind[c1]
            if(verbose==1):
                print("ELEMENTS MERGED ARE "+str(el1)+" and "+str(el2))
            clusnew=[]
          
            dictnew={}
            
            clus3=np.concatenate((clus1[el1],clus1[el2]),axis=1)
            if(verbose==1):
                print("Shape of merged element "+str(clus3.shape))
          
            for k in range(0,len(clus1)):
              if((k!=el1)&(k!=el2)):
                  clusnew.append(clus1[k])
     
            clusnew.append(clus3)
        
            #now finding new distance dictionary
            flagm=0
            flagn=0
            for m in range(0,len(clus1)-1):
                for n in range(m+1,len(clus1)):
                    indexa=str(m)+'_'+str(n)
                    con=not ((m==el1)or(m==el2)or(n==el1)or(n==el2))
                    if( con):
                       
                        if((m<el1)and(n<el1)):
                            str1=str(m)+'_'+str(n)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m<el1)and(n>el1)and(n<el2)):
                            str1=str(m)+'_'+str(n-1)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m<el1)and(n>el2)):
                            str1=str(m)+'_'+str(n-2)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m>el1)and(n<el2)):
                            str1=str(m-1)+'_'+str(n-1)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m>el1)and(n>el2)and(m<el2)):
                            str1=str(m-1)+'_'+str(n-2)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m>el2)and(n>el2)):
                            str1=str(m-2)+'_'+str(n-2)
                            dictnew.update({str1:dist_dict[indexa]})
                        else:
                            print('error in indexing ')
                            print(str(el1)+' '+str(el2)+' m/n=> '+str(m)+' '+str(n))
                            print(con)
                    
                       
            
                            


            count1=count1+1

    else:
        nspkrs=int(spkrs)
        while((len(clus1)>nspkrs)):
            i_ind=[]
            j_ind=[]
            dist=[]
            
            if(count1==0):
                clus1=cluslin

            else:
                clus1=clusnew
                dist_dict=dictnew


            for i in range(0,len(clus1)-1):
                for j in range(i+1,len(clus1)):
                    
                    indexa=str(i)+'_'+str(j)
                    if indexa in dist_dict:
                        dist.append(dist_dict[indexa])
                    else:
                       
                        jsdist=bicdist_single(clus1[i],clus1[j],pfo)
                        
                        dist_dict.update({indexa:jsdist})
                        dist.append(dist_dict[indexa])

                    i_ind.append(i)
                    j_ind.append(j)
                    
                count=count+1     
            if(len(dist)==0):
                break

            bic=min(dist)
            if(verbose==1):
                print("BIC VALUE IS "+str(bic))
            bicact.append(bic)
            c1=dist.index(bic)
            el1=i_ind[c1]
            el2=j_ind[c1]
            if(verbose==1):
                print("ELEMENTS MERGED ARE "+str(el1)+" and "+str(el2))
            clusnew=[]
          
            dictnew={}
            
            clus3=np.concatenate((clus1[el1],clus1[el2]),axis=1)
            if(verbose==1):
                print("Shape of merged element "+str(clus3.shape))
          
            for k in range(0,len(clus1)):
              if((k!=el1)&(k!=el2)):
                  clusnew.append(clus1[k])
     
            clusnew.append(clus3)
        
            #now finding new distance dictionary
            flagm=0
            flagn=0
            for m in range(0,len(clus1)-1):
                for n in range(m+1,len(clus1)):
                    indexa=str(m)+'_'+str(n)
                    con=not ((m==el1)or(m==el2)or(n==el1)or(n==el2))
                    if( con):
                       
                        if((m<el1)and(n<el1)):
                            str1=str(m)+'_'+str(n)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m<el1)and(n>el1)and(n<el2)):
                            str1=str(m)+'_'+str(n-1)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m<el1)and(n>el2)):
                            str1=str(m)+'_'+str(n-2)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m>el1)and(n<el2)):
                            str1=str(m-1)+'_'+str(n-1)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m>el1)and(n>el2)and(m<el2)):
                            str1=str(m-1)+'_'+str(n-2)
                            dictnew.update({str1:dist_dict[indexa]})
                        elif((m>el2)and(n>el2)):
                            str1=str(m-2)+'_'+str(n-2)
                            dictnew.update({str1:dist_dict[indexa]})
                        else:
                            print('error in indexing ')
                            print(str(el1)+' '+str(el2)+' m/n=> '+str(m)+' '+str(n))
                            print(con)
                    
                       
            
                            


            count1=count1+1
        

    print(len(clus1))
    flag_pt=[] 
    for i in range(0,len(cluslin)):
        kld=[]
        for j in range(0,len(clus1)): #all the clusters(small no)
           
            kld.append(bicdist_single(cluslin[i],clus1[j],pfo))
       
        klin=min(kld)
        index=kld.index(klin)
        flag_pt.append(index)
       

    ts=[]

    ind=[]

    for k in range(0,len(flag_pt)-1):
        if(flag_pt[k]!=flag_pt[k+1]):
            
            ts.append(ts_lin[k])
            ind.append(flag_pt[k])

    dat=np.zeros((len(ts),2))
    dat1=np.zeros((len(flag_pt),2))
    dat[:,0]=ts
    dat[:,1]=ind
   
    pf1=pfo
    str12='./results/'+tag+'_'+str(spkrs)+'_'+str(pfo)+'_'+str(pflin)+'.txt'
    text_file = open(str12, "w")
    for i in range(0,len(ts)):
        
        if(i==0):
            start=0
            end=ts[i]
            dur=end-start
        else:
            start=ts[i-1]+0.01
            end=ts[i]
            dur=end-start
        spkid=ind[i]
        str1='SPEAKER '+ tag+' 1 '+str(start)+' '+str(dur)+' <NA> <NA> '+str(spkid)+' <NA> <NA>'
        text_file.write(str1+"\n")
        

    text_file.close()
    return dat
def diar_vad(wav_file,feat_file,pfo,pflin,tag,numfrwin,nsh,MDT,vadtxt,spkrs,filetype,verbose,feattype):


    #Making a webrtcvad Object. For more information , Visit: https://github.com/wiseman/py-webrtcvad
    vad = webrtcvad.Vad()
    vad.set_mode(0) #Setting Mode 0 of 'Aggresiveness'. Visit: https://github.com/wiseman/py-webrtcvad

    x, fs = sf.read(wav_file)
    #Reading The wav File. x=array of samples of the wav file(decimal). fs= sampling frequency


    #Feature Extraction




    if(feattype=='csv'):
        
        if(feat_file=='NoneProvided'):

            #If no external Features Provided by the User, System will extract the feature using python_speech_features Library.
            #More information on the Library : https://github.com/jameslyons/python_speech_features
            #x=wav file(array of decimals)
            #fs=sampling Frequency
            #0.03=Window Size in seconds ; AKA 30mS
            #0.01=Window Shift in seconds ; AKA 10ms
            #13= Dimension of MFCC Feature
            
            feat = mfcc(x,fs,0.03,0.010,13)
            feat= feat.transpose()#feat-->FEATURE VARIABLE. Shape=(dimension, samples)
            
            nsh=0.010 #nsh Variable--> Indiactes window shift interval
            print('using Inbuilt MFFCs as features')
        else:
            feat=getcsvfeat(feat_file) #Gets the features from the CSV File path which is provided
            #feat-->FEATURE VARIABLE. Shape=(dimension, samples)                            

            #When CSV File is provided, but the Window SHift AKA nsh is not provided (nsh==1 by default(see DIAR_MAIN.py)), throw an ERROR
            
            if(nsh==1):
                 print('ERROR, please enter -res (Window Shift) as Features are provided')
                 sys.exit()
            print('Using provided Features')
    
    elif(feattype=='numpy'):
        feat=feat_file
    else:
        print('ERROR, please ENTER Feature options correctly . (feattype)-->indicate numpy or csv')
        sys.exit()
        
        
    
        

    #get the Frame By Frame VAD Output(For each overlapping frame, do VAD and get '1==speech' or '0==no Speech' For each overlapping frame

    if(vadtxt=='1'):
        vad_flag,feat_1,numfram=vadfn(x,nsh,fs,feat)
        print('Performing VAD')
    #vadtxt-->Filename containing VAD Information.If no File Name provided, vadtxt=1 by default. If no file given, perform VAD.
    #feat---> feature file  (Dim,sample)
    #vadfn --> performs VAD using the webrtcvad Library
    #vad_flag--> Frame by Frame VAD info for each overlapping frame.Eg. vad_flag= array[1 0 0 0 1 1 1 0 1 0 1 1 1 0 0.......]
    #numfram-->Number of samples(feature samples) taken into account. numfram= min(number of VAD samples, Number of Feature Samples). Done to avoid Mismatch
    #feat_1-->Returns features with 'numfram' samples . len(feat_1)=numfram
                                        
                                                        
        
    else:
        vad_flag,feat_1,numfram=readVADFile(vadtxt,feat)
    #vadtxt-->Filename containing VAD Information.
    #vad_flag--> Frame by Frame VAD info for each overlapping frame.Eg. vad_flag= array[1 0 0 0 1 1 1 0 1 0 1 1 1 0 0.......]
    #numfram-->Number of samples(feature samples) taken into account. numfram= min(number of VAD samples, Number of Feature Samples). Done to avoid Mismatch
    #feat_1-->Returns features with 'numfram' samples .len(feat_1)=numfram
    print('Number of VAD Samples : '+str(len(vad_flag)))
    print('Number of Extracted Feature Samples : '+str(len(feat[1,:])))
    print('Number of Samples Considered : '+str(numfram))                                    

    speech_seg,speech_seg_start,speech_seg_end=getSpeechSEgments(vad_flag,MDT,feat_1,numfram)
    #getSpeechSEgments--> Analyses the VAD information and gets only the SPEECH Segments.
    #MDT-->Minimum Duration Time. Used for smoothing out the VAD output. if Silence Time<MDT----> Treat as Voice

    #speech_seg--> Returns Speech Segment windows.
        #Each Segment --> Features of the 'Voiced Part'.
        #eg. ---> speech_seg[0].shape=(13,4562) ; speech_seg[1].shape=(13,2341). speech_seg[i]-->returns 'ith' Speech Segment
    #speech_seg_start-->Start Time of Each Speech Segment
    #speech_seg_end-->End Time of Each Speech Segment


    clus_final=[]#Will contain the various speech segments AFTER Segmentation+Linear Clustering
    frms_start=[]#Start Frame of each speech segment after Segmentation+Linear Clustering
    frms_end=[]#End Frame of each speech segment after Segmentation+Linear Clustering

    #Performing Segmentation + Linear Clustering in Each of the Speech Segments
    for u in range(0,len(speech_seg)):
        
        x,y=speech_seg[u].shape
        #y--->Number of Samples in 'speech_seg[u]'
        
        if(y<(numfrwin+30)):#If Number of samples<numfrwin+30, DONT do segmentation+Linear CLustering. Add the entire speech segment. eg., if numfram=100,and nsh=0.010; 130*0.010=1.3s. If segment size less than 1.3s, add entire segment as it is
            
            clus_final.append(speech_seg[u])
            frms_start.append(speech_seg_start[u])
            frms_end.append(speech_seg_end[u])
        else:
            time_stamp,frame_stamp,ts_lin,fs_lin,clus,cluslin=segmentvad( speech_seg[u],1,4.2,numfrwin,nsh,pflin,fs)
            #segmentvad-->Does Segmeatation followed by linear clustering on the speech segments.

            #Outputs
            #time_stamp-->Time Stamp of change points followed by ONLY Segmentation
            #frame_stamp-->Frame Stamp of change points followed by ONLY Seg,Segmentation
            #ts_lin-->Time Stamp of change points followed by Segmentation AND Linear Clustering
            #fs_lin-->Frame Stamp of change points followed by Segmentation AND Linear Clustering
            #clus-->Speech Clusters followed by ONLY Segmentation
            #cluslin-->Speech Clusters followed by Segmentation AND Linear Clustering

            #Inputs
            #speech_seg[i]--->ith speech segment
            #1-->Amplitude Threshold for Peak Detection ( See documentation of peakdetect.py for more information)
            #4.2-->Distance Threshold for Peak Detection ( See documentation of peakdetect.py for more information)
            #numfrwin--> Segmentation Window Size. eg. numfrwin=100;nsh=0.010 ;Segmentation Window= 100*0.010=1s
            #nsh--> frame shift (0.010 default)
            #pflin--> Penalty Factor For Linear CLustering.(in BIC formula)
            #fs-->sampling frequency
            
            
            
            for t in range(0,len(cluslin)):
                clus_final.append(cluslin[t])

            #calculating Frame Start and Frame End for each 'segmented+Linear clustered' speech segment
            if(len(cluslin)==1):
                frms_start.append(speech_seg_start[u])
                frms_end.append(speech_seg_end[u])
                
            else:
                
            
                nst=0
                nen=fs_lin[0]
                frms_start.append(nst+speech_seg_start[u])
                frms_end.append(nen+speech_seg_start[u])
                

                if(len(fs_lin)==1):
                    nst=fs_lin[0]
                    nen=speech_seg_end[u]-speech_seg_start[u]
                    frms_start.append(nst+speech_seg_start[u])
                    frms_end.append(nen+speech_seg_start[u])
                else:
                    for y in range(1,len(fs_lin)):
                        nst=fs_lin[y-1]
                        nen=fs_lin[y]
                        frms_start.append(nst+speech_seg_start[u])
                        frms_end.append(nen+speech_seg_start[u])
                    nst=fs_lin[len(fs_lin)-1]
                    nen=speech_seg_end[u]-speech_seg_start[u]
                    frms_start.append(nst+speech_seg_start[u])
                    frms_end.append(nen+speech_seg_start[u])
                    
                
  

      #Performing Hierarchical Clustering on the Speech Signals
      #Two ways of terminating Clustering
    if(spkrs=='None'):
        clusters_spkrs=clus_vad1(clus_final,pfo,tag,verbose) #End Point of Clustering is based on BIC Value; If delta(BIC)>0; Stop Merging.
        #clusters_spkrs----> The Final CLusters of the speakers AFTER Hierarchical CLustering
    else:
        clusters_spkrs=clus_vad1_spk(clus_final,pfo,tag,int(float(spkrs)),verbose)#End Point of Clustering ---> Number of clusters= Number of speakers
        #clusters_spkrs----> The Final CLusters of the speakers AFTER Hierarchical CLustering
            
    for k in range(len(clusters_spkrs)):
        print(clusters_spkrs[k].shape)

        
    flag_pt=[]
    #Doing Speaker Matching; comparing each speech segment with the speaker clusters and assign speaker ID
    for i in range(0,len(clus_final)):
        kld=[]
        for j in range(0,len(clusters_spkrs)): #all the spkr clusters
       
            kld.append(bicdist_single(clus_final[i],clusters_spkrs[j],pfo))
   
        klin=min(kld)
        index=kld.index(klin)
        flag_pt.append(index)#flag_pt contains speaker ID
       


    #Write to File
    pf1=pfo
    str12=tag+'_'+str(spkrs)+'_'+str(pfo)+'_'+str(pflin)+'.txt'
    text_file = open(str12, "w")

    data_time=np.zeros((len(clus_final),3))
    data_frame=np.zeros((len(clus_final),3))
    
    for i in range(0,len(clus_final)):
        
        start=frms_start[i]*nsh
        end=frms_end[i]*nsh
        dur=end-start
        
        spkid=flag_pt[i]
        data_time[i,0]=spkid
        data_time[i,1]=start
        data_time[i,2]=end-nsh

        data_frame[i,0]=spkid
        data_frame[i,1]=frms_start[i]
        data_frame[i,2]=frms_end[i]-1
        if(filetype=='rttm'):
            str1='SPEAKER '+ tag+' 1 '+str(start)+' '+str(dur)+' <NA> <NA> '+str(spkid)+' <NA> <NA>'
            text_file.write(str1+"\n")
##        else:
##            str1='SP'+str(spkid)+' '+str(frms_start[i])+' '+str(frms_end[i])
##            text_file.write(str1+"\n")

    
    #print(str(len(feat[1,:]))+'   feat_1 '+str(len(feat_1[1,:])))
    data_perfrm=[]
    if(data_frame[0,1]!=0):
        for j in range(0,data_frame[0,1]):
                      data_perfrm.append('SIL')
          
        


    datafrm_sil=[]
    datatim_sil=[]
    for i in range(0,len(data_frame[:,1])):

          size=data_frame[i,2]-data_frame[i,1]+1
          datafrm_sil.append(data_frame[i])
          datatim_sil.append(data_time[i])

          for k in range(0,int(size)):
              data_perfrm.append(data_frame[i,0])
              

          if(i<len(data_frame[:,1])-1):
              
              size2=data_frame[i+1,1]-data_frame[i,2]
              
              if(size2>1):
                  tmp=np.array(['SIL',data_frame[i,2]+1,data_frame[i+1,1]-1])
                  datafrm_sil.append(tmp)    
                  tmp2=np.array(['SIL',(data_frame[i,2]+1)*nsh,(data_frame[i+1,1]-1)*nsh])
                  datatim_sil.append(tmp2)
                  for j in range(0,int(size2)-1):
                      data_perfrm.append('SIL')
          
                  
              
              
        
    datafrm_sil=np.array(datafrm_sil)
    datatim_sil=np.array(datatim_sil)

    for n in range(0,len(datafrm_sil)):
        if(filetype!='rttm'):
            if(datafrm_sil[n,0]!='SIL'):
                str1='SP'+str(int(float(datafrm_sil[n,0])))+' '+str((datafrm_sil[n,1]))+' '+str((datafrm_sil[n,2]))
                text_file.write(str1+"\n")
            else:
                str1=str((datafrm_sil[n,0]))+' '+str((datafrm_sil[n,1]))+' '+str((datafrm_sil[n,2]))
                text_file.write(str1+"\n")
                
            
        
    text_file.close()
    return data_time,data_frame,data_perfrm,datafrm_sil,datatim_sil