def train(filelist,out='./out_dir/',model='UBM_0'): fin = open(filelist) ubm_list = out+model fout = open(ubm_list,'w') for i in fin: wavname = i.strip() basename = tools.gen_uid(wavname) featname = '%s/%s_feat.mfc'%(out,basename) sadname = '%s/%s_sad.txt'%(out,basename) attr = {'audio':wavname,'mfcc':featname,'sad':sadname} # SAD sad.run_sad(attr) # MFCC feat.run_mfcc(attr) fout.write(attr['mfcc']+' '+attr['sad']+'\n') fin.close() fout.close() path = tools.set_path() ubmname = '%s/%s'%(out,model) command = '%s/sgminit -q --label=speech --file-list=%s %s' exe_cmd = command%(path['audioseg'],ubm_list,ubmname) os.system(exe_cmd) return ubmname
def diarization(wavname,ubmname,out_dir): out = out_dir+'/' basename = tools.gen_uid(wavname) sadname = '%s/%s_sad.txt'%(out,basename) featname = '%s/%s_feat.mfc'%(out,basename) bicname = '%s/%s_bic.txt'%(out,basename) clustname = '%s/%s_cluster.txt'%(out,basename) viterbiname = '%s/%s_viterbi.txt'%(out,basename) attr = {'audio':wavname, 'mfcc':featname, 'sad':sadname, 'bic':bicname, 'cluster':clustname, 'viterbi':viterbiname} # SAD sad.run_sad(attr) # MFCC feat.run_mfcc(attr) # BIC bic.run_bic(attr,'audioseg') # CLUSTERING cluster.run_clustering(attr) # Pick top clusters labels, segment_starts,segment_ends = tools.read_segs(attr['cluster']) top_n = tools.top_n_clusters(labels, segment_starts,segment_ends,n=2) # Adapt UBM for each cluster. cluster_gmms = {} for i in top_n: cluster = 'C%s'%(str(i)) gmmname = gmm.adapt(attr,cluster,ubmname) cluster_gmms[cluster] = gmmname # Resegmentation hmmname = '%s/%s_hmm.txt'%(out,basename) resegment.viterbi(attr,cluster_gmms,hmmname) labs,starts,ends = tools.merge_segs(attr['viterbi'],attr['sad']) return labs,starts,ends
path = tools.set_path() sadname = attr['sad'] featname = attr['mfcc'] bicname = attr['bic'] if mode == 'audioseg': command = '%s/sbic --segmentation=%s --label=speech %s %s' exe_cmd = command % (path['audioseg'], sadname, featname, bicname) os.system(exe_cmd) return elif mode == 'uniform': # For cases where we'd rather break the signal # into equal length segments. segment_lengths = 2. * 16000. # in samples unifrom_segmentation(bicname, sadname, segment_lengths) return else: raise ('Segmentation mode not available!') if __name__ == '__main__': wavname = '/Users/navidshokouhi/Downloads/unimaquarie/projects/dolby-annotations/data/1_222_2_7_001-ch6-speaker16.wav' basename = tools.gen_uid(wavname) sadname = './%s_sad.txt' % (basename) featname = './%s.mfc' % (basename) bicname = './%s_bic.txt' % (basename) attr = {'audio': wavname, 'sad': sadname, 'mfcc': featname, 'bic': bicname} sad.run_sad(attr) feat.run_mfcc(attr) run_bic(attr, 'uniform')