def freqs_my_songs(filenames, codebook, pSize=8, keyInv=True, downBeatInv=False, bars=2, normalize=False): """ Returns a list of numpy.array containing frequency for each code in the codebook for each file in filenames """ import numpy as np import VQutils as VQU res = [] nCodes = codebook.shape[0] for f in filenames: # encode song a, b, c, d, e = encode_one_song(f, codebook, pSize=pSize, keyInv=keyInv, downBeatInv=downBeatInv, bars=bars) best_code_per_p, featsNorm, encoding, featsNormMAT, encodingMAT = a, b, c, d, e # get freqs freqs = np.zeros([1, nCodes]) for code in best_code_per_p: freqs[0, int(code)] += 1 if normalize and len(best_code_per_p) > 0: freqs *= 1. / VQU.euclidean_norm(freqs) res.append(freqs) # done, return res return res
def merge_codebook(codebook, nGoal, freqs=[]): """ merge the codebook in an iterative and greedy way. Algo: - finds closest pair of codes - merge them, using freqs if available - repeat until desired number of codes (nGoal) Returns smaller codebook, #codes=nGoal Also returns frequencies of the new codebook Code not optimized!!!!!! close to n^3 operations """ import numpy as np import VQutils as VQU import copy # set freqs, sanity checks if freqs == []: freqs = np.ones(codebook.shape[0]) freqs = np.array(freqs) assert (freqs.size == codebook.shape[0]) assert (nGoal < codebook.shape[0]) assert (nGoal > 0) # let's go! cb = copy.deepcopy(codebook) for k in range(codebook.shape[0] - nGoal): # compute dists for all pairs dists = np.zeros([cb.shape[0], cb.shape[0]]) for l in range(dists.shape[0]): dists[l, l] = np.inf for c in range(l + 1, dists.shape[1]): dists[l, c] = VQU.euclidean_dist(cb[l], cb[c]) dists[c, l] = np.inf # find closest pair pos = np.where(dists == dists.min()) code1 = pos[0][0] code2 = pos[1][0] print 'iter', k, ' min distance=', dists.min( ), ' codes=', code1, ',', code2 assert (code1 < code2 ) #code1 should be smaller from how we filled dists # merge #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2]) cb[code1, :] = np.mean([cb[code1, :], cb[code2, :]], axis=0) freqs[code1] += freqs[code2] # remove if code2 + 1 < cb.shape[0]: cb[code2, :] = cb[-1, :] freqs[code2] = freqs[-1] cb = cb[:-1] freqs = freqs[:-1] # done return cb, freqs
def merge_codebook(codebook,nGoal,freqs = []): """ merge the codebook in an iterative and greedy way. Algo: - finds closest pair of codes - merge them, using freqs if available - repeat until desired number of codes (nGoal) Returns smaller codebook, #codes=nGoal Also returns frequencies of the new codebook Code not optimized!!!!!! close to n^3 operations """ import numpy as np import VQutils as VQU import copy # set freqs, sanity checks if freqs == []: freqs = np.ones(codebook.shape[0]) freqs = np.array(freqs) assert(freqs.size == codebook.shape[0]) assert(nGoal < codebook.shape[0]) assert(nGoal > 0) # let's go! cb = copy.deepcopy(codebook) for k in range(codebook.shape[0] - nGoal): # compute dists for all pairs dists = np.zeros([cb.shape[0],cb.shape[0]]) for l in range(dists.shape[0]): dists[l,l] = np.inf for c in range(l+1,dists.shape[1]): dists[l,c] = VQU.euclidean_dist(cb[l],cb[c]) dists[c,l] = np.inf # find closest pair pos = np.where(dists==dists.min()) code1 = pos[0][0] code2 = pos[1][0] print 'iter',k,' min distance=',dists.min(),' codes=',code1,',',code2 assert(code1 < code2)#code1 should be smaller from how we filled dists # merge #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2]) cb[code1,:] = np.mean([cb[code1,:],cb[code2,:]],axis=0) freqs[code1] += freqs[code2] # remove if code2 + 1 < cb.shape[0]: cb[code2,:] = cb[-1,:] freqs[code2] = freqs[-1] cb = cb[:-1] freqs = freqs[:-1] # done return cb, freqs
def load_and_encode_data(codebook,pSize=4,keyInv=True, downBeatInv=False,bars=1,partialbar=1,offset=0): """ Load a dataset, and encode it with codebook Return dists, avg_dists """ assert(codebook.shape[1] == pSize * 12) import VQutils # get data featsNorm = get_data_maxener(pSize=pSize,keyInv=keyInv, downBeatInv=downBeatInv,bars=bars, partialbar=partialbar,offset=offset) # encode best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern(featsNorm,codebook,scale=False) return dists, avg_dists
def encode_one_song(filename,codebook,pSize=8,keyInv=True, downBeatInv=False,bars=2): """ returns: song, encoding, song as MAT, encoding as MAT matrices are 'derolled' """ import feats_utils as FU import numpy as np import data_iterator import VQutils # create data iterator data_iter = data_iterator.DataIterator() data_iter.setMatfiles([filename]) # set matfiles if bars > 0: data_iter.useBars( bars ) # a pattern spans 'bars' bars else: data_iter.useBars(0) # important to set it to zero! data_iter.setFeatsize( pSize ) # a pattern is a num. of beats data_iter.stopAfterOnePass(True) # load data featsNorm = [FU.normalize_pattern_maxenergy(p,pSize,keyInv,downBeatInv,retRoll=True) for p in data_iter] keyroll = np.array([x[1] for x in featsNorm]) dbroll = np.array([x[2] for x in featsNorm]) featsNorm = [x[0].flatten() for x in featsNorm] if len(featsNorm) == 0: # empty song return [],[],[],[],[] featsNorm = np.array(featsNorm) res = [np.sum(r) > 0 for r in featsNorm] res2 = np.where(res) featsNorm = featsNorm[res2] keyroll = keyroll[res2] dbroll = dbroll[res2] assert(dbroll.shape[0] == keyroll.shape[0]) assert(dbroll.shape[0] == featsNorm.shape[0]) # find code per pattern best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern(featsNorm,codebook) best_code_per_p = np.asarray([int(x) for x in best_code_per_p]) assert best_code_per_p.shape[0] > 0,'empty song, we should have caught that' encoding = codebook[best_code_per_p] # transform into 2 matrices, with derolling!!!!!!!!! assert(featsNorm.shape[0] == encoding.shape[0]) #featsNormMAT = np.concatenate([x.reshape(12,pSize) for x in featsNorm],axis=1) featsNormMAT = np.concatenate([np.roll(np.roll(featsNorm[x].reshape(12,pSize),-keyroll[x],axis=0),-dbroll[x],axis=1) for x in range(featsNorm.shape[0])],axis=1) #encodingMAT = np.concatenate([x.reshape(12,pSize) for x in encoding],axis=1) encodingMAT = np.concatenate([np.roll(np.roll(encoding[x].reshape(12,pSize),-keyroll[x],axis=0),-dbroll[x],axis=1) for x in range(featsNorm.shape[0])],axis=1) # return return best_code_per_p,featsNorm,encoding,featsNormMAT,encodingMAT
def LLE_my_codebook(codebook, nNeighbors=5, nRand=5): """ Performs LLE on the codebook Display the result LLE code not mine, see code for reference. nRand=number of random images added """ import pylab as P import LLE import numpy as np import VQutils as VQU # compute LLE, goal is 2D LLEres = LLE.LLE(codebook.T, nNeighbors, 2) # plot that result P.plot(LLEres[0, :], LLEres[1, :], '.') P.hold(True) # prepare to plot patch_size = codebook[0, :].size / 12 # add random for k in range(nRand): idx = np.random.randint(LLEres.shape[1]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx], .08) # plot extreme left codebook idx = np.argmin(LLEres[0, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot extreme right codebook idx = np.argmax(LLEres[0, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot extreme up codebook idx = np.argmax(LLEres[1, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot extreme down codebook idx = np.argmin(LLEres[1, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot middle codebook idx = np.argmin([VQU.euclidean_dist(r, np.zeros(2)) for r in LLEres.T]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # done, release, show P.hold(False) P.show()
def LLE_my_codebook(codebook,nNeighbors=5,nRand=5): """ Performs LLE on the codebook Display the result LLE code not mine, see code for reference. nRand=number of random images added """ import pylab as P import LLE import numpy as np import VQutils as VQU # compute LLE, goal is 2D LLEres = LLE.LLE(codebook.T,nNeighbors,2) # plot that result P.plot(LLEres[0,:],LLEres[1,:],'.') P.hold(True) # prepare to plot patch_size = codebook[0,:].size / 12 # add random for k in range(nRand): idx = np.random.randint(LLEres.shape[1]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx],.08) # plot extreme left codebook idx = np.argmin(LLEres[0,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot extreme right codebook idx = np.argmax(LLEres[0,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot extreme up codebook idx = np.argmax(LLEres[1,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot extreme down codebook idx = np.argmin(LLEres[1,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot middle codebook idx = np.argmin([VQU.euclidean_dist(r,np.zeros(2)) for r in LLEres.T]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # done, release, show P.hold(False) P.show()
def load_and_encode_data(codebook, pSize=4, keyInv=True, downBeatInv=False, bars=1, partialbar=1, offset=0): """ Load a dataset, and encode it with codebook Return dists, avg_dists """ assert (codebook.shape[1] == pSize * 12) import VQutils # get data featsNorm = get_data_maxener(pSize=pSize, keyInv=keyInv, downBeatInv=downBeatInv, bars=bars, partialbar=partialbar, offset=offset) # encode best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern( featsNorm, codebook, scale=False) return dists, avg_dists
def freqs_my_songs(filenames,codebook,pSize=8,keyInv=True, downBeatInv=False,bars=2,normalize=False): """ Returns a list of numpy.array containing frequency for each code in the codebook for each file in filenames """ import numpy as np import VQutils as VQU res = [] nCodes = codebook.shape[0] for f in filenames: # encode song a,b,c,d,e = encode_one_song(f,codebook,pSize=pSize,keyInv=keyInv, downBeatInv=downBeatInv,bars=bars) best_code_per_p,featsNorm,encoding,featsNormMAT,encodingMAT = a,b,c,d,e # get freqs freqs = np.zeros([1,nCodes]) for code in best_code_per_p: freqs[0,int(code)] += 1 if normalize and len(best_code_per_p) > 0: freqs *= 1./ VQU.euclidean_norm(freqs) res.append(freqs) # done, return res return res
def test_align_one_song(filename, codebook): """ Experiment on how good can we find the alignment of a song Designed for a codebook of pSize=4, bars=1 If song has non 4 beats patterns, problem Return is complex: - -1 if could not perform test - 0 if test succesful - 1-2-3 by how many beats we missed """ import scipy import scipy.io import numpy as np import feats_utils as FU import VQutils as VQU mat = mat = scipy.io.loadmat(filename) btstart = mat['btstart'] barstart = mat['barstart'] try: btstart = btstart.flatten() barstart = barstart.flatten() if btstart.shape[0] < 3 or barstart.shape[0] < 3: return -1 # can not complete except IndexError: print 'index error' return -1 # can not complete except AttributeError: return -1 # can not complete # find bar start based on beat index barstart_idx = [np.where(btstart == x)[0][0] for x in barstart] barstart_idx.append(btstart.shape[0]) # find bar lengths barlengths = np.diff(barstart_idx) # find not4 elems not4 = np.where(barlengths != 4)[0] not4 = np.concatenate([[0], not4, [len(barlengths)]]) # find longest sequence of bars of length 4 beats seqs_of_4 = np.diff(not4) if len(not4) > 1: longest_seq_length = np.max(seqs_of_4) - 1 else: longest_seq_length = not4[0] if longest_seq_length < 10: # why 10? bof.... #print 'return because longest seq has length:',longest_seq_length return -1 # can not complete # find best seq pos pos1 = not4[np.argmax(seqs_of_4)] + 1 pos2 = not4[np.argmax(seqs_of_4) + 1] # longest sequence should be in range(pos1,pos2) # sanity checks assert pos2 - pos1 == longest_seq_length for k in range(pos1, pos2): assert barlengths[k] == 4 # position in beats beat_pos_1 = barstart_idx[pos1] beat_pos_2 = beat_pos_1 + 4 * longest_seq_length assert beat_pos_2 == btstart.shape[0] or np.where( barstart_idx == beat_pos_2)[0].shape[0] > 0 # load actual beat features btchroma = mat['btchroma'] # try everything: offset 0 to 3 best_offset = -1 best_avg_dist = np.inf for offset in range(4): avg_dist = 0 for baridx in range(longest_seq_length - 1): pos = beat_pos_1 + offset + baridx * 4 feats = btchroma[:, pos:pos + 4] featsNorm = FU.normalize_pattern_maxenergy(feats, newsize=4, keyinvariant=True, downbeatinvariant=False) # measure with codebook tmp, dists = VQU.encode_oneiter(featsNorm.flatten(), codebook) avg_dist += (dists[0] * dists[0]) * 1. / featsNorm.size #print 'avg_dist=',avg_dist,'for offset',offset if best_avg_dist > avg_dist: best_avg_dist = avg_dist best_offset = offset # done, return offset, which is 0 if fine return best_offset
def knn_from_freqs_on_artists(filenames, codebook, pSize=8, keyInv=True, downBeatInv=False, bars=2, normalize=True, confMatrix=True, use_l0_dist=False, use_artists=False): """ Performs a leave-one-out experiments where we try to guess the artist from it's nearest neighbors in frequencies We use squared euclidean distance. filenames are expected to be: */artist/album/*.mat if confMatrix=True, plot it. if use_artists, song are matched to artist, not other songs RETURNS: - confusion matrix - freqs per file - artist per file """ import numpy as np import os import VQutils as VQU import time import copy nCodes = codebook.shape[0] # get frequencies for all songs tstart = time.time() freqs = freqs_my_songs(filenames, codebook, pSize=pSize, keyInv=keyInv, downBeatInv=downBeatInv, bars=bars, normalize=normalize) print 'all frequencies computed in', (time.time() - tstart), 'seconds.' # get artists for all songs artists = [] for f in filenames: tmp, song = os.path.split(f) tmp, album = os.path.split(tmp) tmp, artist = os.path.split(tmp) artists.append(artist) artists = np.array(artists) # names of artists artist_names = np.unique(np.sort(artists)) nArtists = artist_names.shape[0] # sanity check assert (len(filenames) == len(artists)) # compute distance between all songs nFiles = len(filenames) tstart = time.time() if not use_artists: dists = np.zeros([nFiles, nFiles]) for l in range(nFiles): for c in range(l + 1, nFiles): if len(freqs[l]) == 0 or len(freqs[c]) == 0: dists[l, c] = np.inf dists[c, l] = np.inf continue if use_l0_dist: dists[l, c] = l0_dist(freqs[l], freqs[c]) else: dists[l, c] = VQU.euclidean_dist(freqs[l], freqs[c]) dists[c, l] = dists[l, c] for l in range(nFiles): # fill diag with inf dists[l, l] = np.inf else: # create a matrix songs * nArtists dists = np.zeros([nFiles, nArtists]) # precompute cntArtists and artistFreqs, not normalized cntArtists = {} artistFreqs = {} for k in artist_names: cntArtists[k] = 0 artistFreqs[k] = np.zeros([1, nCodes]) for k in range(artists.shape[0]): art = artists[k] cntArtists[art] += 1 artistFreqs[art] += freqs[k] # iterate over files for l in range(nFiles): currArtist = artists[l] currCntArtists = copy.deepcopy(cntArtists) currCntArtists[currArtist] -= 1 currArtistFreqs = copy.deepcopy(artistFreqs) currArtistFreqs[currArtist] -= freqs[l] for k in currArtistFreqs.keys(): # normalize currArtistFreqs[k] *= 1. / currCntArtists[k] # fill in the line in dists for c in range(nArtists): art = artist_names[c] if use_l0_dist: dists[l, c] = l0_dist(freqs[l], currArtistFreqs[art]) else: dists[l, c] = VQU.euclidean_dist(freqs[l], currArtistFreqs[art]) print 'distances computed in', (time.time() - tstart), 'seconds.' # confusion matrix confMat = np.zeros([nArtists, nArtists]) # performs leave-one-out KNN nExps = 0 nGood = 0 randScore = 0 # sums prob of having it right by luck, must divide by nExps for songid in range(nFiles): if len(freqs[songid]) == 0: continue # get close matches ordered, remove inf orderedMatches = np.argsort(dists[songid, :]) orderedMatches[np.where(dists[1, orderedMatches] != np.inf)] # artist artist = artists[songid] nMatches = orderedMatches.shape[0] if use_artists: assert nMatches == nArtists # get stats nExps += 1 if not use_artists: nGoodMatches = np.where( artists[orderedMatches] == artist)[0].shape[0] if nGoodMatches == 0: continue randScore += nGoodMatches * 1. / nMatches pred_artist = artists[orderedMatches[0]] else: randScore += 1. / nArtists pred_artist = artist_names[orderedMatches[0]] if pred_artist == artist: nGood += 1 # fill confusion matrix real_artist_id = np.where(artist_names == artist)[0][0] pred_artist_id = np.where(artist_names == pred_artist)[0][0] print songid, ') real artist:', artist, 'id=', real_artist_id, ', pred artist:', pred_artist, 'id=', pred_artist_id confMat[real_artist_id, pred_artist_id] += 1 # done, print out print 'nExps:', nExps print 'rand accuracy:', (randScore * 1. / nExps) print 'accuracy:', (nGood * 1. / nExps) # plot confusion matrix if confMatrix: short_names = np.array([x[:2] for x in artist_names]) import pylab as P P.imshow(confMat, interpolation='nearest', cmap=P.cm.gray_r, origin='lower') P.yticks(P.arange(artist_names.shape[0]), list(artist_names)) P.xticks(P.arange(artist_names.shape[0]), list(short_names)) P.title('confusion matrix (real/predicted)') P.ylabel('TRUE') P.xlabel('RECOG') P.colorbar() # return confusion matrix return confMat, freqs, artists
def encode_one_song(filename, codebook, pSize=8, keyInv=True, downBeatInv=False, bars=2): """ returns: song, encoding, song as MAT, encoding as MAT matrices are 'derolled' """ import feats_utils as FU import numpy as np import data_iterator import VQutils # create data iterator data_iter = data_iterator.DataIterator() data_iter.setMatfiles([filename]) # set matfiles if bars > 0: data_iter.useBars(bars) # a pattern spans 'bars' bars else: data_iter.useBars(0) # important to set it to zero! data_iter.setFeatsize(pSize) # a pattern is a num. of beats data_iter.stopAfterOnePass(True) # load data featsNorm = [ FU.normalize_pattern_maxenergy(p, pSize, keyInv, downBeatInv, retRoll=True) for p in data_iter ] keyroll = np.array([x[1] for x in featsNorm]) dbroll = np.array([x[2] for x in featsNorm]) featsNorm = [x[0].flatten() for x in featsNorm] if len(featsNorm) == 0: # empty song return [], [], [], [], [] featsNorm = np.array(featsNorm) res = [np.sum(r) > 0 for r in featsNorm] res2 = np.where(res) featsNorm = featsNorm[res2] keyroll = keyroll[res2] dbroll = dbroll[res2] assert (dbroll.shape[0] == keyroll.shape[0]) assert (dbroll.shape[0] == featsNorm.shape[0]) # find code per pattern best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern( featsNorm, codebook) best_code_per_p = np.asarray([int(x) for x in best_code_per_p]) assert best_code_per_p.shape[ 0] > 0, 'empty song, we should have caught that' encoding = codebook[best_code_per_p] # transform into 2 matrices, with derolling!!!!!!!!! assert (featsNorm.shape[0] == encoding.shape[0]) #featsNormMAT = np.concatenate([x.reshape(12,pSize) for x in featsNorm],axis=1) featsNormMAT = np.concatenate([ np.roll(np.roll(featsNorm[x].reshape(12, pSize), -keyroll[x], axis=0), -dbroll[x], axis=1) for x in range(featsNorm.shape[0]) ], axis=1) #encodingMAT = np.concatenate([x.reshape(12,pSize) for x in encoding],axis=1) encodingMAT = np.concatenate([ np.roll(np.roll(encoding[x].reshape(12, pSize), -keyroll[x], axis=0), -dbroll[x], axis=1) for x in range(featsNorm.shape[0]) ], axis=1) # return return best_code_per_p, featsNorm, encoding, featsNormMAT, encodingMAT
def test_align_one_song(filename,codebook): """ Experiment on how good can we find the alignment of a song Designed for a codebook of pSize=4, bars=1 If song has non 4 beats patterns, problem Return is complex: - -1 if could not perform test - 0 if test succesful - 1-2-3 by how many beats we missed """ import scipy import scipy.io import numpy as np import feats_utils as FU import VQutils as VQU mat = mat = scipy.io.loadmat(filename) btstart = mat['btstart'] barstart = mat['barstart'] try: btstart = btstart.flatten() barstart = barstart.flatten() if btstart.shape[0] < 3 or barstart.shape[0] < 3: return -1 # can not complete except IndexError: print 'index error' return -1 # can not complete except AttributeError: return -1 # can not complete # find bar start based on beat index barstart_idx = [np.where(btstart==x)[0][0] for x in barstart] barstart_idx.append(btstart.shape[0]) # find bar lengths barlengths = np.diff(barstart_idx) # find not4 elems not4 = np.where(barlengths!=4)[0] not4 = np.concatenate([[0],not4,[len(barlengths)]]) # find longest sequence of bars of length 4 beats seqs_of_4 = np.diff(not4) if len(not4)>1: longest_seq_length = np.max(seqs_of_4) -1 else: longest_seq_length = not4[0] if longest_seq_length < 10: # why 10? bof.... #print 'return because longest seq has length:',longest_seq_length return -1 # can not complete # find best seq pos pos1 = not4[np.argmax(seqs_of_4)]+1 pos2 = not4[np.argmax(seqs_of_4)+1] # longest sequence should be in range(pos1,pos2) # sanity checks assert pos2 - pos1 == longest_seq_length for k in range(pos1,pos2): assert barlengths[k] == 4 # position in beats beat_pos_1 = barstart_idx[pos1] beat_pos_2 = beat_pos_1 + 4 * longest_seq_length assert beat_pos_2 == btstart.shape[0] or np.where(barstart_idx==beat_pos_2)[0].shape[0]>0 # load actual beat features btchroma = mat['btchroma'] # try everything: offset 0 to 3 best_offset = -1 best_avg_dist = np.inf for offset in range(4): avg_dist = 0 for baridx in range(longest_seq_length-1): pos = beat_pos_1 + offset + baridx * 4 feats = btchroma[:,pos:pos+4] featsNorm = FU.normalize_pattern_maxenergy(feats,newsize=4, keyinvariant=True, downbeatinvariant=False) # measure with codebook tmp,dists = VQU.encode_oneiter(featsNorm.flatten(),codebook) avg_dist += (dists[0] * dists[0]) * 1. / featsNorm.size #print 'avg_dist=',avg_dist,'for offset',offset if best_avg_dist > avg_dist: best_avg_dist = avg_dist best_offset = offset # done, return offset, which is 0 if fine return best_offset
def knn_from_freqs_on_artists(filenames,codebook,pSize=8,keyInv=True, downBeatInv=False,bars=2,normalize=True, confMatrix=True,use_l0_dist=False,use_artists=False): """ Performs a leave-one-out experiments where we try to guess the artist from it's nearest neighbors in frequencies We use squared euclidean distance. filenames are expected to be: */artist/album/*.mat if confMatrix=True, plot it. if use_artists, song are matched to artist, not other songs RETURNS: - confusion matrix - freqs per file - artist per file """ import numpy as np import os import VQutils as VQU import time import copy nCodes = codebook.shape[0] # get frequencies for all songs tstart = time.time() freqs = freqs_my_songs(filenames,codebook,pSize=pSize,keyInv=keyInv, downBeatInv=downBeatInv,bars=bars, normalize=normalize) print 'all frequencies computed in',(time.time()-tstart),'seconds.' # get artists for all songs artists = [] for f in filenames: tmp, song = os.path.split(f) tmp,album = os.path.split(tmp) tmp,artist = os.path.split(tmp) artists.append(artist) artists = np.array(artists) # names of artists artist_names = np.unique(np.sort(artists)) nArtists = artist_names.shape[0] # sanity check assert(len(filenames)==len(artists)) # compute distance between all songs nFiles = len(filenames) tstart = time.time() if not use_artists: dists = np.zeros([nFiles,nFiles]) for l in range(nFiles): for c in range(l+1,nFiles): if len(freqs[l])==0 or len(freqs[c])==0: dists[l,c] = np.inf dists[c,l] = np.inf continue if use_l0_dist: dists[l,c] = l0_dist(freqs[l],freqs[c]) else: dists[l,c] = VQU.euclidean_dist(freqs[l],freqs[c]) dists[c,l] = dists[l,c] for l in range(nFiles): # fill diag with inf dists[l,l] = np.inf else: # create a matrix songs * nArtists dists = np.zeros([nFiles,nArtists]) # precompute cntArtists and artistFreqs, not normalized cntArtists = {} artistFreqs = {} for k in artist_names: cntArtists[k] = 0 artistFreqs[k] = np.zeros([1,nCodes]) for k in range(artists.shape[0]): art = artists[k] cntArtists[art] += 1 artistFreqs[art] += freqs[k] # iterate over files for l in range(nFiles): currArtist = artists[l] currCntArtists = copy.deepcopy(cntArtists) currCntArtists[currArtist] -= 1 currArtistFreqs = copy.deepcopy(artistFreqs) currArtistFreqs[currArtist] -= freqs[l] for k in currArtistFreqs.keys(): # normalize currArtistFreqs[k] *= 1. / currCntArtists[k] # fill in the line in dists for c in range(nArtists): art = artist_names[c] if use_l0_dist: dists[l,c] = l0_dist(freqs[l],currArtistFreqs[art]) else: dists[l,c] = VQU.euclidean_dist(freqs[l],currArtistFreqs[art]) print 'distances computed in',(time.time()-tstart),'seconds.' # confusion matrix confMat = np.zeros([nArtists,nArtists]) # performs leave-one-out KNN nExps = 0 nGood = 0 randScore = 0 # sums prob of having it right by luck, must divide by nExps for songid in range(nFiles): if len(freqs[songid]) == 0: continue # get close matches ordered, remove inf orderedMatches = np.argsort(dists[songid,:]) orderedMatches[np.where(dists[1,orderedMatches] != np.inf)] # artist artist = artists[songid] nMatches = orderedMatches.shape[0] if use_artists: assert nMatches == nArtists # get stats nExps += 1 if not use_artists: nGoodMatches = np.where(artists[orderedMatches]==artist)[0].shape[0] if nGoodMatches == 0: continue randScore += nGoodMatches * 1. / nMatches pred_artist = artists[orderedMatches[0]] else: randScore += 1. / nArtists pred_artist = artist_names[orderedMatches[0]] if pred_artist == artist: nGood += 1 # fill confusion matrix real_artist_id =np.where(artist_names==artist)[0][0] pred_artist_id =np.where(artist_names==pred_artist)[0][0] print songid,') real artist:',artist,'id=',real_artist_id,', pred artist:',pred_artist,'id=',pred_artist_id confMat[real_artist_id,pred_artist_id] += 1 # done, print out print 'nExps:',nExps print 'rand accuracy:',(randScore*1./nExps) print 'accuracy:',(nGood*1./nExps) # plot confusion matrix if confMatrix: short_names = np.array([x[:2] for x in artist_names]) import pylab as P P.imshow(confMat,interpolation='nearest',cmap=P.cm.gray_r, origin='lower') P.yticks(P.arange(artist_names.shape[0]),list(artist_names)) P.xticks(P.arange(artist_names.shape[0]),list(short_names)) P.title('confusion matrix (real/predicted)') P.ylabel('TRUE') P.xlabel('RECOG') P.colorbar() # return confusion matrix return confMat,freqs,artists
def do_experiment(experiment_dir,beats,bars,nCodes,nSamples=0,useFirsts=False,seed=0,offset=0,partialbar=1,keyinv=True): """ Performs an independant experiment!!!! """ try: os.makedirs(experiment_dir) except OSError: pass np.random.seed(seed) args = dict(experiment_dir=experiment_dir, beats=beats, bars=bars, nCodes=nCodes, nSamples=nSamples, useFirsts=useFirsts,seed=seed, offset=offset,partialbar=partialbar,keyinv=keyinv) sp.io.savemat(os.path.join(experiment_dir, 'args.mat'), args) # TRAINING # go to the folder of features (per beat) os.chdir(featsDir) if not os.path.exists(os.path.join(experiment_dir, 'codebook.mat')): # load everything, unit: 1 bar, resized to 4 beats # key invariant, not downbeatinvariant featsNorm = demos.get_data_maxener(pSize=beats,keyInv=keyinv,downBeatInv=False,bars=bars,offset=offset,partialbar=partialbar) # select nSamples random samples out of it if nSamples == 0: nSamples = featsNorm.shape[0] if useFirsts: featsNorm = featsNorm[:nSamples] #r = range(featsNorm.shape[0]) # still randomize #np.random.shuffle(r) #featsNorm = featsNorm[r[:]] np.random.shuffle(featsNorm) else: r = range(featsNorm.shape[0]) np.random.shuffle(r) featsNorm = featsNorm[r[:nSamples]] # train a codebook of size 100 codebook,dists = VQutils.online_vq(featsNorm,nCodes,lrate=1e-2,nIter=200) sp.io.savemat(os.path.join(experiment_dir, 'codebook.mat'), dict(codebook=codebook, dists=dists)) del featsNorm else: mat = sp.io.loadmat(os.path.join(experiment_dir, 'codebook.mat')) codebook = mat['codebook'] dists = codebook['dists'] # TESTING # go to the folder of test features (per beat) os.chdir(testFeatsDir) # load and test dists,avg_dists = demos.load_and_encode_data(codebook,pSize=beats, keyInv=keyinv, downBeatInv=False,bars=bars) sp.io.savemat(os.path.join(experiment_dir, 'test.mat'), dict(dists=dists, avg_dists=avg_dists)) # report result (average sqaure distance per ... pixel? # with print outs to know what we are doing report = ['EXPERIMENT REPORT ******************************', 'beats: %s , bars: %s , nCodes: %s , nSamples: %s , offset: %s , partialbar: %s' % (beats, bars, nCodes, nSamples, offset, partialbar)] if useFirsts: report.append('we use firsts %s samples' % nCodes) if not keyinv: report.append('not key invariant!') report.extend(['np.average(avg_dists): %s' % np.average(avg_dists), '************************************************', '']) reportstr = '\n'.join(report) print reportstr f = open(os.path.join(experiment_dir, 'report.txt'), 'w') f.write(reportstr) f.close()