def siplca_method(wavfile,rank=4,win=60,plotiter=10,printiter=10,niter=200,fullspec=False): """ Compute beats using Dan's code Get the fingerprints Align landmarks with the beats Run SIPLCA method Measure errors """ # compute beats print 'compute beats' x,fs = mlab.wavread(wavfile,nout=2) x = np.average(x,axis=1) assert x.shape[0] > 2,'bad signal averaging' feats,beats = mlab.chrombeatftrs(x,fs,400,1,1,nout=2) # get the fingerprints print 'compute landmarks,',beats.shape[1],'beats found' L,S,T,maxes = LANDMARKS.find_landmarks_from_wav(wavfile) # LANDMARKS if not fullspec: # transform them into per beats features maxessecs = get_actual_times(maxes) print 'get features per beat,',len(maxessecs),'landmarks found' beatfeats = get_fingerprint_feats_per_beat(beats,np.max(maxessecs)+.1, maxes,maxessecs) databeat = np.zeros([256,len(beatfeats)]) for bf_idx in range(len(beatfeats)): bf = beatfeats[bf_idx] if bf.shape[1] == 0: continue for k in range(bf.shape[1]): databeat[int(bf[1,k]-1),bf_idx] += 1 print 'number of empty rows:',np.shape(np.where(databeat.sum(1)==0))[1],', removed...' databeat = databeat[np.where(databeat.sum(1)>0)[0],:] # FULL SPECTROGRAM else: # get time for each pos of the spectrogram, then beat for each pos fakemaxes = np.zeros([2,S.shape[1]]) fakemaxes[0,:] = np.array(range(S.shape[1])).reshape(1,S.shape[1]) times = get_actual_times(fakemaxes) # fill in databeat #beats = np.array(beats)[0] databeat = np.zeros([S.shape[0],beats.shape[1]]) for k in range(S.shape[1]): t = times[k] bs = np.where(np.array(beats)[0] > t)[0] if bs.shape[0] == 0: # last beat b = databeat.shape[1] - 1 else: b = max(0,bs[0]-1) databeat[:,b] += np.exp(S[:,k]) # remove the log for NMF databeat -= databeat.min() print 'full spec, max value:',databeat.max(),', shape =',databeat.shape # launch siplca, databeat += 1e-16 print 'launch siplca on',wavfile,', databeat.shape=',databeat.shape np.random.seed(123) V = databeat.copy() V/=V.sum() labels, W, Z, H, segfun, norm= SEGMENTER.segment_song(V, rank=rank,win=win, plotiter=plotiter, printiter=printiter, niter=niter) #res = SEGMENTER.convert_labels_to_segments(labels, beats[0]) # transform labels output to actuall startbeat and stopbeat startbeats = [0] stopbeats = [] currlabel = labels[0] for k in range(1,len(labels)): if labels[k] != currlabel: currlabel = labels[k] startbeats.append(k) stopbeats.append(k-1) stopbeats.append(len(labels)-1) # get groundtruth relwavfile = os.path.relpath(wavfile,start=_audio_dir) labfile = os.path.join(_seglab_dir,relwavfile[:-4]+'.lab') segstarts = [] fIn = open(labfile,'r') for line in fIn.readlines(): if line == '' or line.strip() == '': continue segstarts.append( float(line.strip().split('\t')[0]) ) fIn.close() refstartbeats = [] for ss in segstarts: # slow...! for k in range(beats.shape[1]-1): if beats[0,k] <= ss and beats[0,k+1] > ss: refstartbeats.append(k) break if ss > beats[0,-1]: refstartbeats.append(beats.shape[1]-1) elif ss < beats[0,0]: refstartbeats.append(0) refstartbeats = list(np.unique(refstartbeats)) refstopbeats = list(np.array(refstartbeats[1:]) - 1) + [beats.shape[1]-1] # measure error prec,rec,f,So,Su = MEASURES.prec_rec_f_So_Su(refstartbeats, refstopbeats, startbeats, stopbeats) print 'prec =',prec,', rec =',rec,', f =',f,', So =',So,', Su =',Su return prec,rec,f,So,Su
def typical_splits(basedir): """ Read all lab files. Find the most common number of segments. For that number, find the average proportion of each segment. Apply it to every song. """ # get all lab files labfiles = get_all_files(basedir, pattern='*.lab') # find the typical number of segments hist_nsegs = np.zeros(2000) for f in labfiles: startbs, stopbs, labels = read_lab_file(f) hist_nsegs[len(startbs)] += 1 # most common number of segments common_nsegs = np.argmax(hist_nsegs) print 'most common number of segments:', common_nsegs # for that number, find typical proportion proportions = np.zeros(common_nsegs) count_common_segmentation = 0 for f in labfiles: startbs, stopbs, labels = read_lab_file(f) if len(startbs) != common_nsegs: continue count_common_segmentation += 1 nbeats = stopbs[-1] + 1 diffs = np.diff(startbs + [nbeats]) for k in range(common_nsegs): proportions[k] += diffs[k] * 1. / nbeats for k in range(common_nsegs): proportions[k] *= 1. / count_common_segmentation # our typical proportions are: print 'typical proportions:', proportions # test on all lab files allprec = 0 allrec = 0 allf = 0 allSo = 0 allSu = 0 for f in labfiles: startbs, stopbs, labels = read_lab_file(f) # get ideal fake segmentation nbeats = stopbs[-1] + 1 nbeats_per_seg = np.zeros(common_nsegs) for k in range(common_nsegs): nbeats_per_seg[k] = nbeats * proportions[k] # adjust so it fits the number of beats while np.sum(nbeats_per_seg) > nbeats: nbeats_per_seg[np.argmax(nbeats_per_seg)] -= 1 while np.sum(nbeats_per_seg) < nbeats: nbeats_per_seg[np.argmax(nbeats_per_seg)] += 1 # create corresponding startbs and stopbs startbscand = np.zeros(common_nsegs) startbscand[0] = 0 for k in range(1, common_nsegs): startbscand[k] = startbscand[k - 1] + nbeats_per_seg[k - 1] startbscand = list(startbscand) stopbscand = startbscand[1:] + [nbeats - 1] # measure prec, rec, f, So, Su = MEASURES.prec_rec_f_So_Su( startbs, stopbs, startbscand, stopbscand) allprec += prec allrec += rec allf += f allSo += So allSu += Su # done, average allprec *= 1. / len(labfiles) allrec *= 1. / len(labfiles) allf *= 1. / len(labfiles) allSo *= 1. / len(labfiles) allSu *= 1. / len(labfiles) # print print 'average prec =', allprec, ', rec =', allrec, ', f =', allf, ', So =', allSo, ', Su =', allSu
cuts = get_cuts(dl,dicts) for c in cuts: count_cuts[c] += 1 print 'for longest dict',k,', number of cuts =',len(cuts) # check possible remaining cuts if k == longest_p_len: possible_cuts = set(cuts) else: possible_cuts = possible_cuts.intersection(set(cuts)) print 'number of remaining cuts:',len(possible_cuts) # get measures on segmentation if labfile != '': startbref, stopbref, labels = read_lab_file(labfile) startbcand = np.unique([0] + map(lambda x:x-1,possible_cuts)) stopbcand = np.unique(list(possible_cuts) + [btchroma.shape[1]-1]) prec,rec,fval,So,Su = MEASURES.prec_rec_f_So_Su(startbref,stopbref, startbcand,stopbcand) print 'prec =',prec,', rec =',rec,', fval =',fval,', So =',So,', Su =',Su # iteration done, print if len(possible_cuts) > 0: print 'remaining possible cuts:',possible_cuts # print import pylab as P pparams = {'interpolation':'nearest','origin':'lower','cmap':P.cm.gray_r,'aspect':'auto'} P.subplot(2,1,1) P.imshow(btchroma,**pparams) P.subplot(2,1,2) P.imshow(btchroma_encoded,**pparams) # add lines #for c in possible_cuts: # P.axvline(x=c-.5,ymin=0,ymax=1,color='r')
def typical_splits(basedir): """ Read all lab files. Find the most common number of segments. For that number, find the average proportion of each segment. Apply it to every song. """ # get all lab files labfiles = get_all_files(basedir, pattern="*.lab") # find the typical number of segments hist_nsegs = np.zeros(2000) for f in labfiles: startbs, stopbs, labels = read_lab_file(f) hist_nsegs[len(startbs)] += 1 # most common number of segments common_nsegs = np.argmax(hist_nsegs) print "most common number of segments:", common_nsegs # for that number, find typical proportion proportions = np.zeros(common_nsegs) count_common_segmentation = 0 for f in labfiles: startbs, stopbs, labels = read_lab_file(f) if len(startbs) != common_nsegs: continue count_common_segmentation += 1 nbeats = stopbs[-1] + 1 diffs = np.diff(startbs + [nbeats]) for k in range(common_nsegs): proportions[k] += diffs[k] * 1.0 / nbeats for k in range(common_nsegs): proportions[k] *= 1.0 / count_common_segmentation # our typical proportions are: print "typical proportions:", proportions # test on all lab files allprec = 0 allrec = 0 allf = 0 allSo = 0 allSu = 0 for f in labfiles: startbs, stopbs, labels = read_lab_file(f) # get ideal fake segmentation nbeats = stopbs[-1] + 1 nbeats_per_seg = np.zeros(common_nsegs) for k in range(common_nsegs): nbeats_per_seg[k] = nbeats * proportions[k] # adjust so it fits the number of beats while np.sum(nbeats_per_seg) > nbeats: nbeats_per_seg[np.argmax(nbeats_per_seg)] -= 1 while np.sum(nbeats_per_seg) < nbeats: nbeats_per_seg[np.argmax(nbeats_per_seg)] += 1 # create corresponding startbs and stopbs startbscand = np.zeros(common_nsegs) startbscand[0] = 0 for k in range(1, common_nsegs): startbscand[k] = startbscand[k - 1] + nbeats_per_seg[k - 1] startbscand = list(startbscand) stopbscand = startbscand[1:] + [nbeats - 1] # measure prec, rec, f, So, Su = MEASURES.prec_rec_f_So_Su(startbs, stopbs, startbscand, stopbscand) allprec += prec allrec += rec allf += f allSo += So allSu += Su # done, average allprec *= 1.0 / len(labfiles) allrec *= 1.0 / len(labfiles) allf *= 1.0 / len(labfiles) allSo *= 1.0 / len(labfiles) allSu *= 1.0 / len(labfiles) # print print "average prec =", allprec, ", rec =", allrec, ", f =", allf, ", So =", allSo, ", Su =", allSu
for c in cuts: count_cuts[c] += 1 print 'for longest dict', k, ', number of cuts =', len(cuts) # check possible remaining cuts if k == longest_p_len: possible_cuts = set(cuts) else: possible_cuts = possible_cuts.intersection(set(cuts)) print 'number of remaining cuts:', len(possible_cuts) # get measures on segmentation if labfile != '': startbref, stopbref, labels = read_lab_file(labfile) startbcand = np.unique([0] + map(lambda x: x - 1, possible_cuts)) stopbcand = np.unique( list(possible_cuts) + [btchroma.shape[1] - 1]) prec, rec, fval, So, Su = MEASURES.prec_rec_f_So_Su( startbref, stopbref, startbcand, stopbcand) print 'prec =', prec, ', rec =', rec, ', fval =', fval, ', So =', So, ', Su =', Su # iteration done, print if len(possible_cuts) > 0: print 'remaining possible cuts:', possible_cuts # print import pylab as P pparams = { 'interpolation': 'nearest', 'origin': 'lower', 'cmap': P.cm.gray_r, 'aspect': 'auto' } P.subplot(2, 1, 1) P.imshow(btchroma, **pparams)
def siplca_method(wavfile, rank=4, win=60, plotiter=10, printiter=10, niter=200, fullspec=False): """ Compute beats using Dan's code Get the fingerprints Align landmarks with the beats Run SIPLCA method Measure errors """ # compute beats print 'compute beats' x, fs = mlab.wavread(wavfile, nout=2) x = np.average(x, axis=1) assert x.shape[0] > 2, 'bad signal averaging' feats, beats = mlab.chrombeatftrs(x, fs, 400, 1, 1, nout=2) # get the fingerprints print 'compute landmarks,', beats.shape[1], 'beats found' L, S, T, maxes = LANDMARKS.find_landmarks_from_wav(wavfile) # LANDMARKS if not fullspec: # transform them into per beats features maxessecs = get_actual_times(maxes) print 'get features per beat,', len(maxessecs), 'landmarks found' beatfeats = get_fingerprint_feats_per_beat(beats, np.max(maxessecs) + .1, maxes, maxessecs) databeat = np.zeros([256, len(beatfeats)]) for bf_idx in range(len(beatfeats)): bf = beatfeats[bf_idx] if bf.shape[1] == 0: continue for k in range(bf.shape[1]): databeat[int(bf[1, k] - 1), bf_idx] += 1 print 'number of empty rows:', np.shape( np.where(databeat.sum(1) == 0))[1], ', removed...' databeat = databeat[np.where(databeat.sum(1) > 0)[0], :] # FULL SPECTROGRAM else: # get time for each pos of the spectrogram, then beat for each pos fakemaxes = np.zeros([2, S.shape[1]]) fakemaxes[0, :] = np.array(range(S.shape[1])).reshape(1, S.shape[1]) times = get_actual_times(fakemaxes) # fill in databeat #beats = np.array(beats)[0] databeat = np.zeros([S.shape[0], beats.shape[1]]) for k in range(S.shape[1]): t = times[k] bs = np.where(np.array(beats)[0] > t)[0] if bs.shape[0] == 0: # last beat b = databeat.shape[1] - 1 else: b = max(0, bs[0] - 1) databeat[:, b] += np.exp(S[:, k]) # remove the log for NMF databeat -= databeat.min() print 'full spec, max value:', databeat.max( ), ', shape =', databeat.shape # launch siplca, databeat += 1e-16 print 'launch siplca on', wavfile, ', databeat.shape=', databeat.shape np.random.seed(123) V = databeat.copy() V /= V.sum() labels, W, Z, H, segfun, norm = SEGMENTER.segment_song(V, rank=rank, win=win, plotiter=plotiter, printiter=printiter, niter=niter) #res = SEGMENTER.convert_labels_to_segments(labels, beats[0]) # transform labels output to actuall startbeat and stopbeat startbeats = [0] stopbeats = [] currlabel = labels[0] for k in range(1, len(labels)): if labels[k] != currlabel: currlabel = labels[k] startbeats.append(k) stopbeats.append(k - 1) stopbeats.append(len(labels) - 1) # get groundtruth relwavfile = os.path.relpath(wavfile, start=_audio_dir) labfile = os.path.join(_seglab_dir, relwavfile[:-4] + '.lab') segstarts = [] fIn = open(labfile, 'r') for line in fIn.readlines(): if line == '' or line.strip() == '': continue segstarts.append(float(line.strip().split('\t')[0])) fIn.close() refstartbeats = [] for ss in segstarts: # slow...! for k in range(beats.shape[1] - 1): if beats[0, k] <= ss and beats[0, k + 1] > ss: refstartbeats.append(k) break if ss > beats[0, -1]: refstartbeats.append(beats.shape[1] - 1) elif ss < beats[0, 0]: refstartbeats.append(0) refstartbeats = list(np.unique(refstartbeats)) refstopbeats = list(np.array(refstartbeats[1:]) - 1) + [beats.shape[1] - 1] # measure error prec, rec, f, So, Su = MEASURES.prec_rec_f_So_Su(refstartbeats, refstopbeats, startbeats, stopbeats) print 'prec =', prec, ', rec =', rec, ', f =', f, ', So =', So, ', Su =', Su return prec, rec, f, So, Su