from plottools import plotall import pylab as P sys.path.append('../PythonSrc') import evaluation import masking import imputation if __name__ == '__main__': btchroma145 = sio.loadmat( '/home/thierry/Columbia/covers80/coversongs/covers32k/Caroline_No/beach_boys+Pet_Sounds+13-Caroline_No.mp3.mat' )['btchroma'] mask, masked_cols = masking.random_col_mask(btchroma145, ncols=1, win=30) recon, lt = imputation.lintransform_cols(btchroma145, mask, masked_cols, win=1) pos1 = masked_cols[0] - 7 pos2 = masked_cols[0] + 7 im1 = btchroma145[:, pos1:pos2].copy() im2 = (btchroma145 * mask)[:, pos1:pos2].copy() im3 = recon[:, pos1:pos2].copy() # plot all this fig = P.figure() fig.subplots_adjust(hspace=0.4) blackbarsfun = lambda: P.gca().axvline(linewidth=2, color='0.', x=6.5 ) and P.gca().axvline( linewidth=2, color='0.', x=7.5) plotall([im1, im2, im3], subplot=(3, 1), cmap='gray_r',
def compare_all(btchroma, mask, masked_cols, codebook=None): """ Compare all the algorithms we have so far. A lot of parameters hard-coded, but... It will get improved. We display: 1) original beat chromagram 2) masked beat chromagram 3) imputation by random 4) imputation by averaging nearby columns 5) imputation by knn on the rest of the song 6) imputation by linear prediction 7) imputation by NMF 8) imputation by codebook, if provided To init mask and masked_cols, something like: mask,masked_cols = masking.random_col_mask(btchroma,ncols=1,win=30) """ # for displaying purposes, display window size 50 pos1 = masked_cols[0] - 5 pos2 = masked_cols[0] + 5 allimages = [] titles = [] xlabels = [] # original beat chroma and masked one im1 = btchroma[:, pos1:pos2].copy() im2 = (btchroma * mask)[:, pos1:pos2].copy() allimages.append(im1) titles.append('original beat chroma') xlabels.append('') allimages.append(im2) titles.append('masked beat chroma') xlabels.append('') # 3) random recon = IMPUTATION.random_col(btchroma, mask, masked_cols) im3_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl') im3 = recon[:, pos1:pos2].copy() allimages.append(im3) titles.append('random') xlabels.append('err=' + str(im3_err)) # 4) average nearby columns recon = IMPUTATION.average_col(btchroma, mask, masked_cols, win=3) im4_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl') im4 = recon[:, pos1:pos2].copy() allimages.append(im4) titles.append('average 2 nearby cols') xlabels.append('err=' + str(im4_err)) # 5) knn recon, used_cols = IMPUTATION.eucldist_cols(btchroma, mask, masked_cols, win=7) im5_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl') im5 = recon[:, pos1:pos2].copy() allimages.append(im5) titles.append('knn for the whole song') xlabels.append('err=' + str(im5_err)) # 6) linear prediction recon, proj = IMPUTATION.lintransform_cols(btchroma, mask, masked_cols, win=1) im6_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl') im6 = recon[:, pos1:pos2].copy() allimages.append(im6) titles.append('linear prediction (1 col)') xlabels.append('err=' + str(im6_err)) # 7) SIPLCA res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma * mask).copy(), 4, mask, win=5) W, Z, H, norm, recon, logprob = res im7_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl') im7 = recon[:, pos1:pos2].copy() allimages.append(im7) titles.append('SIPLCA, rank=4, win=5') xlabels.append('err=' + str(im7_err)) # 7) SIPLCA 2 res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma * mask).copy(), 25, mask, win=10) W, Z, H, norm, recon, logprob = res im7_err_bis = EVAL.recon_error(btchroma, mask, recon, measure='eucl') im7_bis = recon[:, pos1:pos2].copy() allimages.append(im7_bis) titles.append('SIPLCA, rank=25, win=10') xlabels.append('err=' + str(im7_err_bis)) # 8) codebook if codebook != None: cb = [p.reshape(12, p.size / 12) for p in codebook] recon, used_codes = IMPUTATION.codebook_cols(btchroma, mask, masked_cols, cb) im8_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl') im8 = recon[:, pos1:pos2].copy() allimages.append(im8) titles.append('codebook, ' + str(codebook.shape[0]) + ' codes of length ' + str(codebook.shape[1] / 12)) xlabels.append('err=' + str(im8_err)) # ALL IMAGES CREATED fig = plt.figure() fig.subplots_adjust(hspace=0.4) blackbarsfun = lambda: plt.gca().axvline( linewidth=2, color='0.', x=4.5) and plt.gca().axvline( linewidth=2, color='0.', x=5.5) # plotall plotall(allimages, subplot=(3, 3), cmap='gray_r', title=titles, xlabel=xlabels, axvlines=blackbarsfun, colorbar=False)
def test_maskedcol_on_dataset(datasetdir, method='random', ncols=1, win=3, rank=4, codebook=None, **kwargs): """ General method to test a method on a whole dataset for one masked column Methods are: - random - randomfromsong - average - codebook - knn_eucl - knn_kl - lintrans - siplca - siplca2 Used arguments vary based on the method. For SIPLCA, we can use **kwargs to set priors. """ # get all matfiles matfiles = get_all_matfiles(datasetdir) # init total_cnt = 0 errs_eucl = [] errs_kl = [] # some specific inits if codebook != None and not type(codebook) == type([]): codebook = [p.reshape(12, codebook.shape[1] / 12) for p in codebook] print 'codebook in ndarray format transformed to list' # iterate for matfile in matfiles: btchroma = sio.loadmat(matfile)['btchroma'] if len(btchroma.shape) < 2: continue if btchroma.shape[1] < MINLENGTH or np.isnan(btchroma).any(): continue mask, masked_cols = MASKING.random_col_mask(btchroma, ncols=ncols, win=25) ########## ALGORITHM DEPENDENT if method == 'random': recon = IMPUTATION.random_col(btchroma, mask, masked_cols) elif method == 'randomfromsong': recon = IMPUTATION.random_col_from_song(btchroma, mask, masked_cols) elif method == 'average': recon = IMPUTATION.average_col(btchroma, mask, masked_cols, win=win) elif method == 'codebook': recon, used_codes = IMPUTATION.codebook_cols( btchroma, mask, masked_cols, codebook) elif method == 'knn_eucl': recon, used_cols = IMPUTATION.knn_cols(btchroma, mask, masked_cols, win=win, measure='eucl') elif method == 'knn_kl': recon, used_cols = IMPUTATION.knn_cols(btchroma, mask, masked_cols, win=win, measure='kl') elif method == 'lintrans': recon, proj = IMPUTATION.lintransform_cols(btchroma, mask, masked_cols, win=win) elif method == 'siplca': res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma * mask).copy(), rank, mask, win=win, convergence_thresh=1e-15, **kwargs) W, Z, H, norm, recon, logprob = res elif method == 'siplca2': res = IMPUTATION_PLCA.SIPLCA2_mask.analyze( (btchroma * mask).copy(), rank, mask, win=win, convergence_thresh=1e-15, **kwargs) W, Z, H, norm, recon, logprob = res else: print 'unknown method:', method return ########## ALGORITHM DEPENDENT END # measure recon err = recon_error(btchroma, mask, recon, measure='eucl') if err > 100: print 'huge EUCL error:', err, ', method =', method, ',file =', matfile errs_eucl.append(err) err = recon_error(btchroma, mask, recon, measure='kl') if err > 100: print 'huge KL error:', err, ', method =', method, ',file =', matfile errs_kl.append(err) total_cnt += 1 # done print 'number of songs tested:', total_cnt print 'average sq euclidean dist:', np.mean(errs_eucl), '(', np.std( errs_eucl), ')' print 'average kl divergence:', np.mean(errs_kl), '(', np.std(errs_kl), ')'
def test_maskedcol_on_dataset(datasetdir,method='random',ncols=1,win=3,rank=4,codebook=None,**kwargs): """ General method to test a method on a whole dataset for one masked column Methods are: - random - randomfromsong - average - codebook - knn_eucl - knn_kl - lintrans - siplca - siplca2 Used arguments vary based on the method. For SIPLCA, we can use **kwargs to set priors. """ # get all matfiles matfiles = get_all_matfiles(datasetdir) # init total_cnt = 0 errs_eucl = [] errs_kl = [] # some specific inits if codebook != None and not type(codebook) == type([]): codebook = [p.reshape(12,codebook.shape[1]/12) for p in codebook] print 'codebook in ndarray format transformed to list' # iterate for matfile in matfiles: btchroma = sio.loadmat(matfile)['btchroma'] if len(btchroma.shape) < 2: continue if btchroma.shape[1] < MINLENGTH or np.isnan(btchroma).any(): continue mask,masked_cols = MASKING.random_col_mask(btchroma,ncols=ncols,win=25) ########## ALGORITHM DEPENDENT if method == 'random': recon = IMPUTATION.random_col(btchroma,mask,masked_cols) elif method == 'randomfromsong': recon = IMPUTATION.random_col_from_song(btchroma,mask,masked_cols) elif method == 'average': recon = IMPUTATION.average_col(btchroma,mask,masked_cols,win=win) elif method == 'codebook': recon,used_codes = IMPUTATION.codebook_cols(btchroma,mask,masked_cols,codebook) elif method == 'knn_eucl': recon,used_cols = IMPUTATION.knn_cols(btchroma,mask,masked_cols,win=win,measure='eucl') elif method == 'knn_kl': recon,used_cols = IMPUTATION.knn_cols(btchroma,mask,masked_cols,win=win,measure='kl') elif method == 'lintrans': recon,proj = IMPUTATION.lintransform_cols(btchroma,mask,masked_cols,win=win) elif method == 'siplca': res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma*mask).copy(), rank,mask,win=win, convergence_thresh=1e-15, **kwargs) W, Z, H, norm, recon, logprob = res elif method == 'siplca2': res = IMPUTATION_PLCA.SIPLCA2_mask.analyze((btchroma*mask).copy(), rank,mask,win=win, convergence_thresh=1e-15, **kwargs) W, Z, H, norm, recon, logprob = res else: print 'unknown method:',method return ########## ALGORITHM DEPENDENT END # measure recon err = recon_error(btchroma,mask,recon,measure='eucl') if err > 100: print 'huge EUCL error:',err,', method =',method,',file =',matfile errs_eucl.append( err ) err = recon_error(btchroma,mask,recon,measure='kl') if err > 100: print 'huge KL error:',err,', method =',method,',file =',matfile errs_kl.append( err ) total_cnt += 1 # done print 'number of songs tested:',total_cnt print 'average sq euclidean dist:',np.mean(errs_eucl),'(',np.std(errs_eucl),')' print 'average kl divergence:',np.mean(errs_kl),'(',np.std(errs_kl),')'
import numpy as np import scipy.io as sio from plottools import plotall import pylab as P sys.path.append('../PythonSrc') import evaluation import masking import imputation if __name__ == '__main__': btchroma145 = sio.loadmat('/home/thierry/Columbia/covers80/coversongs/covers32k/Caroline_No/beach_boys+Pet_Sounds+13-Caroline_No.mp3.mat')['btchroma'] mask,masked_cols = masking.random_col_mask(btchroma145,ncols=1,win=30) recon,lt = imputation.lintransform_cols(btchroma145,mask,masked_cols,win=1) pos1 = masked_cols[0] - 7 pos2 = masked_cols[0] + 7 im1 = btchroma145[:,pos1:pos2].copy() im2 = (btchroma145 * mask)[:,pos1:pos2].copy() im3 = recon[:,pos1:pos2].copy() # plot all this fig = P.figure() fig.subplots_adjust(hspace=0.4) blackbarsfun = lambda: P.gca().axvline(linewidth=2,color='0.',x=6.5) and P.gca().axvline(linewidth=2,color='0.',x=7.5) plotall([im1,im2,im3],subplot=(3,1),cmap='gray_r', title=['original','original masked','reconstruction'], axvlines=blackbarsfun,colorbar=False,xticks=[()]*3) P.show()
def compare_all(btchroma,mask,masked_cols,codebook=None): """ Compare all the algorithms we have so far. A lot of parameters hard-coded, but... It will get improved. We display: 1) original beat chromagram 2) masked beat chromagram 3) imputation by random 4) imputation by averaging nearby columns 5) imputation by knn on the rest of the song 6) imputation by linear prediction 7) imputation by NMF 8) imputation by codebook, if provided To init mask and masked_cols, something like: mask,masked_cols = masking.random_col_mask(btchroma,ncols=1,win=30) """ # for displaying purposes, display window size 50 pos1 = masked_cols[0] - 5 pos2 = masked_cols[0] + 5 allimages = [] titles = [] xlabels = [] # original beat chroma and masked one im1 = btchroma[:,pos1:pos2].copy() im2 = (btchroma * mask)[:,pos1:pos2].copy() allimages.append(im1) titles.append('original beat chroma') xlabels.append('') allimages.append(im2) titles.append('masked beat chroma') xlabels.append('') # 3) random recon = IMPUTATION.random_col(btchroma,mask,masked_cols) im3_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl') im3 = recon[:,pos1:pos2].copy() allimages.append(im3) titles.append('random') xlabels.append('err='+str(im3_err)) # 4) average nearby columns recon = IMPUTATION.average_col(btchroma,mask,masked_cols,win=3) im4_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl') im4 = recon[:,pos1:pos2].copy() allimages.append(im4) titles.append('average 2 nearby cols') xlabels.append('err='+str(im4_err)) # 5) knn recon,used_cols = IMPUTATION.eucldist_cols(btchroma,mask,masked_cols,win=7) im5_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl') im5 = recon[:,pos1:pos2].copy() allimages.append(im5) titles.append('knn for the whole song') xlabels.append('err='+str(im5_err)) # 6) linear prediction recon,proj = IMPUTATION.lintransform_cols(btchroma,mask,masked_cols,win=1) im6_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl') im6 = recon[:,pos1:pos2].copy() allimages.append(im6) titles.append('linear prediction (1 col)') xlabels.append('err='+str(im6_err)) # 7) SIPLCA res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma*mask).copy(), 4,mask,win=5) W, Z, H, norm, recon, logprob = res im7_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl') im7 = recon[:,pos1:pos2].copy() allimages.append(im7) titles.append('SIPLCA, rank=4, win=5') xlabels.append('err='+str(im7_err)) # 7) SIPLCA 2 res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma*mask).copy(), 25,mask,win=10) W, Z, H, norm, recon, logprob = res im7_err_bis = EVAL.recon_error(btchroma,mask,recon,measure='eucl') im7_bis = recon[:,pos1:pos2].copy() allimages.append(im7_bis) titles.append('SIPLCA, rank=25, win=10') xlabels.append('err='+str(im7_err_bis)) # 8) codebook if codebook != None: cb = [p.reshape(12,p.size/12) for p in codebook] recon,used_codes = IMPUTATION.codebook_cols(btchroma,mask,masked_cols,cb) im8_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl') im8 = recon[:,pos1:pos2].copy() allimages.append(im8) titles.append('codebook, '+str(codebook.shape[0])+' codes of length '+str(codebook.shape[1]/12)) xlabels.append('err='+str(im8_err)) # ALL IMAGES CREATED fig = plt.figure() fig.subplots_adjust(hspace=0.4) blackbarsfun = lambda: plt.gca().axvline(linewidth=2,color='0.',x=4.5) and plt.gca().axvline(linewidth=2,color='0.',x=5.5) # plotall plotall(allimages,subplot=(3,3),cmap='gray_r',title=titles,xlabel=xlabels,axvlines=blackbarsfun,colorbar=False)