def get_data_maxener(pSize=4, keyInv=True, downBeatInv=False, bars=1, partialbar=1, offset=0): """ Util function for something we do all the time Remove the empty patterns INPUT: - pSize (default: 16) - keyInv (default: True) - downBeatInv (default: False) - bars (default: 2) """ import data_iterator import feats_utils as FU import numpy as np import time # start time tstart = time.time() # get maltab files allfiles = FU.get_all_matfiles('.') print len(allfiles), ' .mat files found' # create and set iterator data_iter = data_iterator.DataIterator() data_iter.setMatfiles(allfiles) # set matfiles if bars > 0: data_iter.useBars(bars) # a pattern spans 'bars' bars if partialbar < 1: assert (bars == 1) data_iter.usePartialBar(partialbar) else: data_iter.useBars(0) # important to set it to zero! data_iter.setFeatsize(pSize) # a pattern is a num. of beats if offset > 0: data_iter.setOffset(offset) data_iter.stopAfterOnePass(True) # stop after one full iteration # get features featsNorm = [ FU.normalize_pattern_maxenergy(p, pSize, keyInv, downBeatInv).flatten() for p in data_iter ] print 'found ', len(featsNorm), ' patterns before removing empty ones' # make it an array featsNorm = np.array(featsNorm) # remove empyt patterns res = [np.sum(r) > 0 for r in featsNorm] res2 = np.where(res) featsNorm = featsNorm[res2] # time? print 'all patterns acquired and normalized in ' + str(time.time() - tstart) + 'seconds' print 'featsNorm.shape = ', featsNorm.shape return featsNorm
def encode_one_song(filename,codebook,pSize=8,keyInv=True, downBeatInv=False,bars=2): """ returns: song, encoding, song as MAT, encoding as MAT matrices are 'derolled' """ import feats_utils as FU import numpy as np import data_iterator import VQutils # create data iterator data_iter = data_iterator.DataIterator() data_iter.setMatfiles([filename]) # set matfiles if bars > 0: data_iter.useBars( bars ) # a pattern spans 'bars' bars else: data_iter.useBars(0) # important to set it to zero! data_iter.setFeatsize( pSize ) # a pattern is a num. of beats data_iter.stopAfterOnePass(True) # load data featsNorm = [FU.normalize_pattern_maxenergy(p,pSize,keyInv,downBeatInv,retRoll=True) for p in data_iter] keyroll = np.array([x[1] for x in featsNorm]) dbroll = np.array([x[2] for x in featsNorm]) featsNorm = [x[0].flatten() for x in featsNorm] if len(featsNorm) == 0: # empty song return [],[],[],[],[] featsNorm = np.array(featsNorm) res = [np.sum(r) > 0 for r in featsNorm] res2 = np.where(res) featsNorm = featsNorm[res2] keyroll = keyroll[res2] dbroll = dbroll[res2] assert(dbroll.shape[0] == keyroll.shape[0]) assert(dbroll.shape[0] == featsNorm.shape[0]) # find code per pattern best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern(featsNorm,codebook) best_code_per_p = np.asarray([int(x) for x in best_code_per_p]) assert best_code_per_p.shape[0] > 0,'empty song, we should have caught that' encoding = codebook[best_code_per_p] # transform into 2 matrices, with derolling!!!!!!!!! assert(featsNorm.shape[0] == encoding.shape[0]) #featsNormMAT = np.concatenate([x.reshape(12,pSize) for x in featsNorm],axis=1) featsNormMAT = np.concatenate([np.roll(np.roll(featsNorm[x].reshape(12,pSize),-keyroll[x],axis=0),-dbroll[x],axis=1) for x in range(featsNorm.shape[0])],axis=1) #encodingMAT = np.concatenate([x.reshape(12,pSize) for x in encoding],axis=1) encodingMAT = np.concatenate([np.roll(np.roll(encoding[x].reshape(12,pSize),-keyroll[x],axis=0),-dbroll[x],axis=1) for x in range(featsNorm.shape[0])],axis=1) # return return best_code_per_p,featsNorm,encoding,featsNormMAT,encodingMAT
def get_data_maxener(pSize=4,keyInv=True,downBeatInv=False,bars=1,partialbar=1,offset=0): """ Util function for something we do all the time Remove the empty patterns INPUT: - pSize (default: 16) - keyInv (default: True) - downBeatInv (default: False) - bars (default: 2) """ import data_iterator import feats_utils as FU import numpy as np import time # start time tstart = time.time() # get maltab files allfiles = FU.get_all_matfiles('.') print len(allfiles),' .mat files found' # create and set iterator data_iter = data_iterator.DataIterator() data_iter.setMatfiles(allfiles) # set matfiles if bars > 0: data_iter.useBars( bars ) # a pattern spans 'bars' bars if partialbar < 1: assert(bars==1) data_iter.usePartialBar( partialbar ) else: data_iter.useBars(0) # important to set it to zero! data_iter.setFeatsize( pSize ) # a pattern is a num. of beats if offset > 0: data_iter.setOffset(offset) data_iter.stopAfterOnePass(True)# stop after one full iteration # get features featsNorm = [FU.normalize_pattern_maxenergy(p,pSize,keyInv,downBeatInv).flatten() for p in data_iter] print 'found ', len(featsNorm),' patterns before removing empty ones' # make it an array featsNorm = np.array(featsNorm) # remove empyt patterns res = [np.sum(r) > 0 for r in featsNorm] res2 = np.where(res) featsNorm = featsNorm[res2] # time? print 'all patterns acquired and normalized in ' + str(time.time()-tstart) + 'seconds' print 'featsNorm.shape = ',featsNorm.shape return featsNorm
def test_align_one_song(filename, codebook): """ Experiment on how good can we find the alignment of a song Designed for a codebook of pSize=4, bars=1 If song has non 4 beats patterns, problem Return is complex: - -1 if could not perform test - 0 if test succesful - 1-2-3 by how many beats we missed """ import scipy import scipy.io import numpy as np import feats_utils as FU import VQutils as VQU mat = mat = scipy.io.loadmat(filename) btstart = mat['btstart'] barstart = mat['barstart'] try: btstart = btstart.flatten() barstart = barstart.flatten() if btstart.shape[0] < 3 or barstart.shape[0] < 3: return -1 # can not complete except IndexError: print 'index error' return -1 # can not complete except AttributeError: return -1 # can not complete # find bar start based on beat index barstart_idx = [np.where(btstart == x)[0][0] for x in barstart] barstart_idx.append(btstart.shape[0]) # find bar lengths barlengths = np.diff(barstart_idx) # find not4 elems not4 = np.where(barlengths != 4)[0] not4 = np.concatenate([[0], not4, [len(barlengths)]]) # find longest sequence of bars of length 4 beats seqs_of_4 = np.diff(not4) if len(not4) > 1: longest_seq_length = np.max(seqs_of_4) - 1 else: longest_seq_length = not4[0] if longest_seq_length < 10: # why 10? bof.... #print 'return because longest seq has length:',longest_seq_length return -1 # can not complete # find best seq pos pos1 = not4[np.argmax(seqs_of_4)] + 1 pos2 = not4[np.argmax(seqs_of_4) + 1] # longest sequence should be in range(pos1,pos2) # sanity checks assert pos2 - pos1 == longest_seq_length for k in range(pos1, pos2): assert barlengths[k] == 4 # position in beats beat_pos_1 = barstart_idx[pos1] beat_pos_2 = beat_pos_1 + 4 * longest_seq_length assert beat_pos_2 == btstart.shape[0] or np.where( barstart_idx == beat_pos_2)[0].shape[0] > 0 # load actual beat features btchroma = mat['btchroma'] # try everything: offset 0 to 3 best_offset = -1 best_avg_dist = np.inf for offset in range(4): avg_dist = 0 for baridx in range(longest_seq_length - 1): pos = beat_pos_1 + offset + baridx * 4 feats = btchroma[:, pos:pos + 4] featsNorm = FU.normalize_pattern_maxenergy(feats, newsize=4, keyinvariant=True, downbeatinvariant=False) # measure with codebook tmp, dists = VQU.encode_oneiter(featsNorm.flatten(), codebook) avg_dist += (dists[0] * dists[0]) * 1. / featsNorm.size #print 'avg_dist=',avg_dist,'for offset',offset if best_avg_dist > avg_dist: best_avg_dist = avg_dist best_offset = offset # done, return offset, which is 0 if fine return best_offset
def encode_one_song(filename, codebook, pSize=8, keyInv=True, downBeatInv=False, bars=2): """ returns: song, encoding, song as MAT, encoding as MAT matrices are 'derolled' """ import feats_utils as FU import numpy as np import data_iterator import VQutils # create data iterator data_iter = data_iterator.DataIterator() data_iter.setMatfiles([filename]) # set matfiles if bars > 0: data_iter.useBars(bars) # a pattern spans 'bars' bars else: data_iter.useBars(0) # important to set it to zero! data_iter.setFeatsize(pSize) # a pattern is a num. of beats data_iter.stopAfterOnePass(True) # load data featsNorm = [ FU.normalize_pattern_maxenergy(p, pSize, keyInv, downBeatInv, retRoll=True) for p in data_iter ] keyroll = np.array([x[1] for x in featsNorm]) dbroll = np.array([x[2] for x in featsNorm]) featsNorm = [x[0].flatten() for x in featsNorm] if len(featsNorm) == 0: # empty song return [], [], [], [], [] featsNorm = np.array(featsNorm) res = [np.sum(r) > 0 for r in featsNorm] res2 = np.where(res) featsNorm = featsNorm[res2] keyroll = keyroll[res2] dbroll = dbroll[res2] assert (dbroll.shape[0] == keyroll.shape[0]) assert (dbroll.shape[0] == featsNorm.shape[0]) # find code per pattern best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern( featsNorm, codebook) best_code_per_p = np.asarray([int(x) for x in best_code_per_p]) assert best_code_per_p.shape[ 0] > 0, 'empty song, we should have caught that' encoding = codebook[best_code_per_p] # transform into 2 matrices, with derolling!!!!!!!!! assert (featsNorm.shape[0] == encoding.shape[0]) #featsNormMAT = np.concatenate([x.reshape(12,pSize) for x in featsNorm],axis=1) featsNormMAT = np.concatenate([ np.roll(np.roll(featsNorm[x].reshape(12, pSize), -keyroll[x], axis=0), -dbroll[x], axis=1) for x in range(featsNorm.shape[0]) ], axis=1) #encodingMAT = np.concatenate([x.reshape(12,pSize) for x in encoding],axis=1) encodingMAT = np.concatenate([ np.roll(np.roll(encoding[x].reshape(12, pSize), -keyroll[x], axis=0), -dbroll[x], axis=1) for x in range(featsNorm.shape[0]) ], axis=1) # return return best_code_per_p, featsNorm, encoding, featsNormMAT, encodingMAT
def test_align_one_song(filename,codebook): """ Experiment on how good can we find the alignment of a song Designed for a codebook of pSize=4, bars=1 If song has non 4 beats patterns, problem Return is complex: - -1 if could not perform test - 0 if test succesful - 1-2-3 by how many beats we missed """ import scipy import scipy.io import numpy as np import feats_utils as FU import VQutils as VQU mat = mat = scipy.io.loadmat(filename) btstart = mat['btstart'] barstart = mat['barstart'] try: btstart = btstart.flatten() barstart = barstart.flatten() if btstart.shape[0] < 3 or barstart.shape[0] < 3: return -1 # can not complete except IndexError: print 'index error' return -1 # can not complete except AttributeError: return -1 # can not complete # find bar start based on beat index barstart_idx = [np.where(btstart==x)[0][0] for x in barstart] barstart_idx.append(btstart.shape[0]) # find bar lengths barlengths = np.diff(barstart_idx) # find not4 elems not4 = np.where(barlengths!=4)[0] not4 = np.concatenate([[0],not4,[len(barlengths)]]) # find longest sequence of bars of length 4 beats seqs_of_4 = np.diff(not4) if len(not4)>1: longest_seq_length = np.max(seqs_of_4) -1 else: longest_seq_length = not4[0] if longest_seq_length < 10: # why 10? bof.... #print 'return because longest seq has length:',longest_seq_length return -1 # can not complete # find best seq pos pos1 = not4[np.argmax(seqs_of_4)]+1 pos2 = not4[np.argmax(seqs_of_4)+1] # longest sequence should be in range(pos1,pos2) # sanity checks assert pos2 - pos1 == longest_seq_length for k in range(pos1,pos2): assert barlengths[k] == 4 # position in beats beat_pos_1 = barstart_idx[pos1] beat_pos_2 = beat_pos_1 + 4 * longest_seq_length assert beat_pos_2 == btstart.shape[0] or np.where(barstart_idx==beat_pos_2)[0].shape[0]>0 # load actual beat features btchroma = mat['btchroma'] # try everything: offset 0 to 3 best_offset = -1 best_avg_dist = np.inf for offset in range(4): avg_dist = 0 for baridx in range(longest_seq_length-1): pos = beat_pos_1 + offset + baridx * 4 feats = btchroma[:,pos:pos+4] featsNorm = FU.normalize_pattern_maxenergy(feats,newsize=4, keyinvariant=True, downbeatinvariant=False) # measure with codebook tmp,dists = VQU.encode_oneiter(featsNorm.flatten(),codebook) avg_dist += (dists[0] * dists[0]) * 1. / featsNorm.size #print 'avg_dist=',avg_dist,'for offset',offset if best_avg_dist > avg_dist: best_avg_dist = avg_dist best_offset = offset # done, return offset, which is 0 if fine return best_offset
# feature from matfile mat_feats = features.features_from_matfile(tmpfilemat,pSize=8,usebars=2, keyInv=True,songKeyInv=False, positive=False,do_resample=True) mat_feats = mat_feats[np.nonzero(np.sum(mat_feats,axis=1))] print 'features from matfile computed, shape =',mat_feats.shape # features from matfile old school import data_iterator import feats_utils as FU data_iter = data_iterator.DataIterator() data_iter.setMatfiles([tmpfilemat]) data_iter.useBars(2) data_iter.stopAfterOnePass(True) featsNorm = [FU.normalize_pattern_maxenergy(p,8,True,False).flatten() for p in data_iter] featsNorm = np.array(featsNorm) res = [np.sum(r) > 0 for r in featsNorm] res2 = np.where(res) featsNorm = featsNorm[res2] print 'features from matfile (old school) computed, shape =',featsNorm.shape # compare min_len = min(mat_feats.shape[0],online_feats.shape[0],featsNorm.shape[0]) if mat_feats.shape != online_feats.shape: print 'wrong shape between online and mat feats...' # plot matfile features P.figure() plotall([x.reshape(12,mat_feats.shape[1]/12) for x in mat_feats[:3]], interpolation='nearest',aspect='auto',cmap=P.cm.gray_r,