Beispiel #1
0
def train(filelist,out='./out_dir/',model='UBM_0'):
    fin = open(filelist)
    ubm_list = out+model
    fout = open(ubm_list,'w')
    for i in fin:
        wavname = i.strip()
        basename = tools.gen_uid(wavname)
        featname = '%s/%s_feat.mfc'%(out,basename)
        sadname = '%s/%s_sad.txt'%(out,basename)
        attr = {'audio':wavname,'mfcc':featname,'sad':sadname}
        # SAD
        sad.run_sad(attr)
        
        # MFCC
        feat.run_mfcc(attr)

        fout.write(attr['mfcc']+' '+attr['sad']+'\n')
    fin.close()
    fout.close()
    
    path = tools.set_path()
    ubmname = '%s/%s'%(out,model)
    command = '%s/sgminit -q --label=speech --file-list=%s %s'
    exe_cmd = command%(path['audioseg'],ubm_list,ubmname)
    os.system(exe_cmd)
    return ubmname
Beispiel #2
0
def diarization(wavname,ubmname,out_dir):
    out = out_dir+'/'
    basename = tools.gen_uid(wavname)
    sadname = '%s/%s_sad.txt'%(out,basename)
    featname = '%s/%s_feat.mfc'%(out,basename)
    bicname = '%s/%s_bic.txt'%(out,basename)
    clustname = '%s/%s_cluster.txt'%(out,basename)
    viterbiname = '%s/%s_viterbi.txt'%(out,basename)
    attr = {'audio':wavname,
        'mfcc':featname,
        'sad':sadname,
        'bic':bicname,
        'cluster':clustname,
        'viterbi':viterbiname}

    # SAD
    sad.run_sad(attr)
    
    # MFCC
    feat.run_mfcc(attr)
    
    # BIC
    bic.run_bic(attr,'audioseg')
    
    # CLUSTERING
    cluster.run_clustering(attr)
    
    
    # Pick top clusters
    labels, segment_starts,segment_ends = tools.read_segs(attr['cluster'])
    top_n = tools.top_n_clusters(labels, segment_starts,segment_ends,n=2)
    
    
    # Adapt UBM for each cluster.
    cluster_gmms = {}
    for i in top_n:
        cluster = 'C%s'%(str(i))
        gmmname = gmm.adapt(attr,cluster,ubmname)
        cluster_gmms[cluster] = gmmname
    
    # Resegmentation
    hmmname = '%s/%s_hmm.txt'%(out,basename)
    resegment.viterbi(attr,cluster_gmms,hmmname)
    labs,starts,ends = tools.merge_segs(attr['viterbi'],attr['sad'])
    return labs,starts,ends
Beispiel #3
0
    path = tools.set_path()
    sadname = attr['sad']
    featname = attr['mfcc']
    bicname = attr['bic']

    if mode == 'audioseg':
        command = '%s/sbic --segmentation=%s --label=speech %s %s'
        exe_cmd = command % (path['audioseg'], sadname, featname, bicname)
        os.system(exe_cmd)
        return
    elif mode == 'uniform':
        # For cases where we'd rather break the signal
        # into equal length segments.
        segment_lengths = 2. * 16000.  # in samples
        unifrom_segmentation(bicname, sadname, segment_lengths)
        return
    else:
        raise ('Segmentation mode not available!')


if __name__ == '__main__':
    wavname = '/Users/navidshokouhi/Downloads/unimaquarie/projects/dolby-annotations/data/1_222_2_7_001-ch6-speaker16.wav'
    basename = tools.gen_uid(wavname)
    sadname = './%s_sad.txt' % (basename)
    featname = './%s.mfc' % (basename)
    bicname = './%s_bic.txt' % (basename)
    attr = {'audio': wavname, 'sad': sadname, 'mfcc': featname, 'bic': bicname}
    sad.run_sad(attr)
    feat.run_mfcc(attr)
    run_bic(attr, 'uniform')