def siplca_btchroma(btchroma,rank=4,win=60,plotiter=0,printiter=10,niter=200,alphaZ=0): """ Main method, takes a beat chroma matrix, segments it using Ron's SIPLCA RETURN - labels, one number per frame """ np.random.seed(123) labels, W, Z, H, segfun, norm= SEGMENTER.segment_song(btchroma, rank=rank,win=win, plotiter=plotiter, printiter=printiter, niter=niter,alphaZ=alphaZ) return labels, W, Z, H, segfun, norm
def siplca_btchroma(btchroma, rank=4, win=60, plotiter=0, printiter=10, niter=200, alphaZ=0): """ Main method, takes a beat chroma matrix, segments it using Ron's SIPLCA RETURN - labels, one number per frame """ np.random.seed(123) labels, W, Z, H, segfun, norm = SEGMENTER.segment_song(btchroma, rank=rank, win=win, plotiter=plotiter, printiter=printiter, niter=niter, alphaZ=alphaZ) return labels, W, Z, H, segfun, norm
def siplca_method(wavfile,rank=4,win=60,plotiter=10,printiter=10,niter=200,fullspec=False): """ Compute beats using Dan's code Get the fingerprints Align landmarks with the beats Run SIPLCA method Measure errors """ # compute beats print 'compute beats' x,fs = mlab.wavread(wavfile,nout=2) x = np.average(x,axis=1) assert x.shape[0] > 2,'bad signal averaging' feats,beats = mlab.chrombeatftrs(x,fs,400,1,1,nout=2) # get the fingerprints print 'compute landmarks,',beats.shape[1],'beats found' L,S,T,maxes = LANDMARKS.find_landmarks_from_wav(wavfile) # LANDMARKS if not fullspec: # transform them into per beats features maxessecs = get_actual_times(maxes) print 'get features per beat,',len(maxessecs),'landmarks found' beatfeats = get_fingerprint_feats_per_beat(beats,np.max(maxessecs)+.1, maxes,maxessecs) databeat = np.zeros([256,len(beatfeats)]) for bf_idx in range(len(beatfeats)): bf = beatfeats[bf_idx] if bf.shape[1] == 0: continue for k in range(bf.shape[1]): databeat[int(bf[1,k]-1),bf_idx] += 1 print 'number of empty rows:',np.shape(np.where(databeat.sum(1)==0))[1],', removed...' databeat = databeat[np.where(databeat.sum(1)>0)[0],:] # FULL SPECTROGRAM else: # get time for each pos of the spectrogram, then beat for each pos fakemaxes = np.zeros([2,S.shape[1]]) fakemaxes[0,:] = np.array(range(S.shape[1])).reshape(1,S.shape[1]) times = get_actual_times(fakemaxes) # fill in databeat #beats = np.array(beats)[0] databeat = np.zeros([S.shape[0],beats.shape[1]]) for k in range(S.shape[1]): t = times[k] bs = np.where(np.array(beats)[0] > t)[0] if bs.shape[0] == 0: # last beat b = databeat.shape[1] - 1 else: b = max(0,bs[0]-1) databeat[:,b] += np.exp(S[:,k]) # remove the log for NMF databeat -= databeat.min() print 'full spec, max value:',databeat.max(),', shape =',databeat.shape # launch siplca, databeat += 1e-16 print 'launch siplca on',wavfile,', databeat.shape=',databeat.shape np.random.seed(123) V = databeat.copy() V/=V.sum() labels, W, Z, H, segfun, norm= SEGMENTER.segment_song(V, rank=rank,win=win, plotiter=plotiter, printiter=printiter, niter=niter) #res = SEGMENTER.convert_labels_to_segments(labels, beats[0]) # transform labels output to actuall startbeat and stopbeat startbeats = [0] stopbeats = [] currlabel = labels[0] for k in range(1,len(labels)): if labels[k] != currlabel: currlabel = labels[k] startbeats.append(k) stopbeats.append(k-1) stopbeats.append(len(labels)-1) # get groundtruth relwavfile = os.path.relpath(wavfile,start=_audio_dir) labfile = os.path.join(_seglab_dir,relwavfile[:-4]+'.lab') segstarts = [] fIn = open(labfile,'r') for line in fIn.readlines(): if line == '' or line.strip() == '': continue segstarts.append( float(line.strip().split('\t')[0]) ) fIn.close() refstartbeats = [] for ss in segstarts: # slow...! for k in range(beats.shape[1]-1): if beats[0,k] <= ss and beats[0,k+1] > ss: refstartbeats.append(k) break if ss > beats[0,-1]: refstartbeats.append(beats.shape[1]-1) elif ss < beats[0,0]: refstartbeats.append(0) refstartbeats = list(np.unique(refstartbeats)) refstopbeats = list(np.array(refstartbeats[1:]) - 1) + [beats.shape[1]-1] # measure error prec,rec,f,So,Su = MEASURES.prec_rec_f_So_Su(refstartbeats, refstopbeats, startbeats, stopbeats) print 'prec =',prec,', rec =',rec,', f =',f,', So =',So,', Su =',Su return prec,rec,f,So,Su
def siplca_method(wavfile, rank=4, win=60, plotiter=10, printiter=10, niter=200, fullspec=False): """ Compute beats using Dan's code Get the fingerprints Align landmarks with the beats Run SIPLCA method Measure errors """ # compute beats print 'compute beats' x, fs = mlab.wavread(wavfile, nout=2) x = np.average(x, axis=1) assert x.shape[0] > 2, 'bad signal averaging' feats, beats = mlab.chrombeatftrs(x, fs, 400, 1, 1, nout=2) # get the fingerprints print 'compute landmarks,', beats.shape[1], 'beats found' L, S, T, maxes = LANDMARKS.find_landmarks_from_wav(wavfile) # LANDMARKS if not fullspec: # transform them into per beats features maxessecs = get_actual_times(maxes) print 'get features per beat,', len(maxessecs), 'landmarks found' beatfeats = get_fingerprint_feats_per_beat(beats, np.max(maxessecs) + .1, maxes, maxessecs) databeat = np.zeros([256, len(beatfeats)]) for bf_idx in range(len(beatfeats)): bf = beatfeats[bf_idx] if bf.shape[1] == 0: continue for k in range(bf.shape[1]): databeat[int(bf[1, k] - 1), bf_idx] += 1 print 'number of empty rows:', np.shape( np.where(databeat.sum(1) == 0))[1], ', removed...' databeat = databeat[np.where(databeat.sum(1) > 0)[0], :] # FULL SPECTROGRAM else: # get time for each pos of the spectrogram, then beat for each pos fakemaxes = np.zeros([2, S.shape[1]]) fakemaxes[0, :] = np.array(range(S.shape[1])).reshape(1, S.shape[1]) times = get_actual_times(fakemaxes) # fill in databeat #beats = np.array(beats)[0] databeat = np.zeros([S.shape[0], beats.shape[1]]) for k in range(S.shape[1]): t = times[k] bs = np.where(np.array(beats)[0] > t)[0] if bs.shape[0] == 0: # last beat b = databeat.shape[1] - 1 else: b = max(0, bs[0] - 1) databeat[:, b] += np.exp(S[:, k]) # remove the log for NMF databeat -= databeat.min() print 'full spec, max value:', databeat.max( ), ', shape =', databeat.shape # launch siplca, databeat += 1e-16 print 'launch siplca on', wavfile, ', databeat.shape=', databeat.shape np.random.seed(123) V = databeat.copy() V /= V.sum() labels, W, Z, H, segfun, norm = SEGMENTER.segment_song(V, rank=rank, win=win, plotiter=plotiter, printiter=printiter, niter=niter) #res = SEGMENTER.convert_labels_to_segments(labels, beats[0]) # transform labels output to actuall startbeat and stopbeat startbeats = [0] stopbeats = [] currlabel = labels[0] for k in range(1, len(labels)): if labels[k] != currlabel: currlabel = labels[k] startbeats.append(k) stopbeats.append(k - 1) stopbeats.append(len(labels) - 1) # get groundtruth relwavfile = os.path.relpath(wavfile, start=_audio_dir) labfile = os.path.join(_seglab_dir, relwavfile[:-4] + '.lab') segstarts = [] fIn = open(labfile, 'r') for line in fIn.readlines(): if line == '' or line.strip() == '': continue segstarts.append(float(line.strip().split('\t')[0])) fIn.close() refstartbeats = [] for ss in segstarts: # slow...! for k in range(beats.shape[1] - 1): if beats[0, k] <= ss and beats[0, k + 1] > ss: refstartbeats.append(k) break if ss > beats[0, -1]: refstartbeats.append(beats.shape[1] - 1) elif ss < beats[0, 0]: refstartbeats.append(0) refstartbeats = list(np.unique(refstartbeats)) refstopbeats = list(np.array(refstartbeats[1:]) - 1) + [beats.shape[1] - 1] # measure error prec, rec, f, So, Su = MEASURES.prec_rec_f_So_Su(refstartbeats, refstopbeats, startbeats, stopbeats) print 'prec =', prec, ', rec =', rec, ', f =', f, ', So =', So, ', Su =', Su return prec, rec, f, So, Su