from plottools import plotall
import pylab as P

sys.path.append('../PythonSrc')
import evaluation
import masking
import imputation

if __name__ == '__main__':

    btchroma145 = sio.loadmat(
        '/home/thierry/Columbia/covers80/coversongs/covers32k/Caroline_No/beach_boys+Pet_Sounds+13-Caroline_No.mp3.mat'
    )['btchroma']
    mask, masked_cols = masking.random_col_mask(btchroma145, ncols=1, win=30)
    recon, lt = imputation.lintransform_cols(btchroma145,
                                             mask,
                                             masked_cols,
                                             win=1)
    pos1 = masked_cols[0] - 7
    pos2 = masked_cols[0] + 7
    im1 = btchroma145[:, pos1:pos2].copy()
    im2 = (btchroma145 * mask)[:, pos1:pos2].copy()
    im3 = recon[:, pos1:pos2].copy()
    # plot all this
    fig = P.figure()
    fig.subplots_adjust(hspace=0.4)
    blackbarsfun = lambda: P.gca().axvline(linewidth=2, color='0.', x=6.5
                                           ) and P.gca().axvline(
                                               linewidth=2, color='0.', x=7.5)
    plotall([im1, im2, im3],
            subplot=(3, 1),
            cmap='gray_r',
def compare_all(btchroma, mask, masked_cols, codebook=None):
    """
    Compare all the algorithms we have so far.
    A lot of parameters hard-coded, but...
    It will get improved.
    We display:
      1) original beat chromagram
      2) masked beat chromagram
      3) imputation by random
      4) imputation by averaging nearby columns
      5) imputation by knn on the rest of the song
      6) imputation by linear prediction
      7) imputation by NMF
      8) imputation by codebook, if provided

    To init mask and masked_cols, something like:
    mask,masked_cols = masking.random_col_mask(btchroma,ncols=1,win=30)
    """
    # for displaying purposes, display window size 50
    pos1 = masked_cols[0] - 5
    pos2 = masked_cols[0] + 5
    allimages = []
    titles = []
    xlabels = []
    # original beat chroma and masked one
    im1 = btchroma[:, pos1:pos2].copy()
    im2 = (btchroma * mask)[:, pos1:pos2].copy()
    allimages.append(im1)
    titles.append('original beat chroma')
    xlabels.append('')
    allimages.append(im2)
    titles.append('masked beat chroma')
    xlabels.append('')
    # 3) random
    recon = IMPUTATION.random_col(btchroma, mask, masked_cols)
    im3_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl')
    im3 = recon[:, pos1:pos2].copy()
    allimages.append(im3)
    titles.append('random')
    xlabels.append('err=' + str(im3_err))
    # 4) average nearby columns
    recon = IMPUTATION.average_col(btchroma, mask, masked_cols, win=3)
    im4_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl')
    im4 = recon[:, pos1:pos2].copy()
    allimages.append(im4)
    titles.append('average 2 nearby cols')
    xlabels.append('err=' + str(im4_err))
    # 5) knn
    recon, used_cols = IMPUTATION.eucldist_cols(btchroma,
                                                mask,
                                                masked_cols,
                                                win=7)
    im5_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl')
    im5 = recon[:, pos1:pos2].copy()
    allimages.append(im5)
    titles.append('knn for the whole song')
    xlabels.append('err=' + str(im5_err))
    # 6) linear prediction
    recon, proj = IMPUTATION.lintransform_cols(btchroma,
                                               mask,
                                               masked_cols,
                                               win=1)
    im6_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl')
    im6 = recon[:, pos1:pos2].copy()
    allimages.append(im6)
    titles.append('linear prediction (1 col)')
    xlabels.append('err=' + str(im6_err))
    # 7) SIPLCA
    res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma * mask).copy(),
                                              4,
                                              mask,
                                              win=5)
    W, Z, H, norm, recon, logprob = res
    im7_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl')
    im7 = recon[:, pos1:pos2].copy()
    allimages.append(im7)
    titles.append('SIPLCA, rank=4, win=5')
    xlabels.append('err=' + str(im7_err))
    # 7) SIPLCA 2
    res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma * mask).copy(),
                                              25,
                                              mask,
                                              win=10)
    W, Z, H, norm, recon, logprob = res
    im7_err_bis = EVAL.recon_error(btchroma, mask, recon, measure='eucl')
    im7_bis = recon[:, pos1:pos2].copy()
    allimages.append(im7_bis)
    titles.append('SIPLCA, rank=25, win=10')
    xlabels.append('err=' + str(im7_err_bis))
    # 8) codebook
    if codebook != None:
        cb = [p.reshape(12, p.size / 12) for p in codebook]
        recon, used_codes = IMPUTATION.codebook_cols(btchroma, mask,
                                                     masked_cols, cb)
        im8_err = EVAL.recon_error(btchroma, mask, recon, measure='eucl')
        im8 = recon[:, pos1:pos2].copy()
        allimages.append(im8)
        titles.append('codebook, ' + str(codebook.shape[0]) +
                      ' codes of length ' + str(codebook.shape[1] / 12))
        xlabels.append('err=' + str(im8_err))
    # ALL IMAGES CREATED
    fig = plt.figure()
    fig.subplots_adjust(hspace=0.4)
    blackbarsfun = lambda: plt.gca().axvline(
        linewidth=2, color='0.', x=4.5) and plt.gca().axvline(
            linewidth=2, color='0.', x=5.5)
    # plotall
    plotall(allimages,
            subplot=(3, 3),
            cmap='gray_r',
            title=titles,
            xlabel=xlabels,
            axvlines=blackbarsfun,
            colorbar=False)
Exemple #3
0
def test_maskedcol_on_dataset(datasetdir,
                              method='random',
                              ncols=1,
                              win=3,
                              rank=4,
                              codebook=None,
                              **kwargs):
    """
    General method to test a method on a whole dataset for one masked column
    Methods are:
      - random
      - randomfromsong
      - average
      - codebook
      - knn_eucl
      - knn_kl
      - lintrans
      - siplca
      - siplca2
    Used arguments vary based on the method. For SIPLCA, we can use **kwargs
    to set priors.
    """
    # get all matfiles
    matfiles = get_all_matfiles(datasetdir)
    # init
    total_cnt = 0
    errs_eucl = []
    errs_kl = []
    # some specific inits
    if codebook != None and not type(codebook) == type([]):
        codebook = [p.reshape(12, codebook.shape[1] / 12) for p in codebook]
        print 'codebook in ndarray format transformed to list'
    # iterate
    for matfile in matfiles:
        btchroma = sio.loadmat(matfile)['btchroma']
        if len(btchroma.shape) < 2:
            continue
        if btchroma.shape[1] < MINLENGTH or np.isnan(btchroma).any():
            continue
        mask, masked_cols = MASKING.random_col_mask(btchroma,
                                                    ncols=ncols,
                                                    win=25)
        ########## ALGORITHM DEPENDENT
        if method == 'random':
            recon = IMPUTATION.random_col(btchroma, mask, masked_cols)
        elif method == 'randomfromsong':
            recon = IMPUTATION.random_col_from_song(btchroma, mask,
                                                    masked_cols)
        elif method == 'average':
            recon = IMPUTATION.average_col(btchroma,
                                           mask,
                                           masked_cols,
                                           win=win)
        elif method == 'codebook':
            recon, used_codes = IMPUTATION.codebook_cols(
                btchroma, mask, masked_cols, codebook)
        elif method == 'knn_eucl':
            recon, used_cols = IMPUTATION.knn_cols(btchroma,
                                                   mask,
                                                   masked_cols,
                                                   win=win,
                                                   measure='eucl')
        elif method == 'knn_kl':
            recon, used_cols = IMPUTATION.knn_cols(btchroma,
                                                   mask,
                                                   masked_cols,
                                                   win=win,
                                                   measure='kl')
        elif method == 'lintrans':
            recon, proj = IMPUTATION.lintransform_cols(btchroma,
                                                       mask,
                                                       masked_cols,
                                                       win=win)
        elif method == 'siplca':
            res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma * mask).copy(),
                                                      rank,
                                                      mask,
                                                      win=win,
                                                      convergence_thresh=1e-15,
                                                      **kwargs)
            W, Z, H, norm, recon, logprob = res
        elif method == 'siplca2':
            res = IMPUTATION_PLCA.SIPLCA2_mask.analyze(
                (btchroma * mask).copy(),
                rank,
                mask,
                win=win,
                convergence_thresh=1e-15,
                **kwargs)
            W, Z, H, norm, recon, logprob = res
        else:
            print 'unknown method:', method
            return
        ########## ALGORITHM DEPENDENT END
        # measure recon
        err = recon_error(btchroma, mask, recon, measure='eucl')
        if err > 100:
            print 'huge EUCL error:', err, ', method =', method, ',file =', matfile
        errs_eucl.append(err)
        err = recon_error(btchroma, mask, recon, measure='kl')
        if err > 100:
            print 'huge KL error:', err, ', method =', method, ',file =', matfile
        errs_kl.append(err)
        total_cnt += 1
    # done
    print 'number of songs tested:', total_cnt
    print 'average sq euclidean dist:', np.mean(errs_eucl), '(', np.std(
        errs_eucl), ')'
    print 'average kl divergence:', np.mean(errs_kl), '(', np.std(errs_kl), ')'
def test_maskedcol_on_dataset(datasetdir,method='random',ncols=1,win=3,rank=4,codebook=None,**kwargs):
    """
    General method to test a method on a whole dataset for one masked column
    Methods are:
      - random
      - randomfromsong
      - average
      - codebook
      - knn_eucl
      - knn_kl
      - lintrans
      - siplca
      - siplca2
    Used arguments vary based on the method. For SIPLCA, we can use **kwargs
    to set priors.
    """
    # get all matfiles
    matfiles = get_all_matfiles(datasetdir)
    # init
    total_cnt = 0
    errs_eucl = []
    errs_kl = []
    # some specific inits
    if codebook != None and not type(codebook) == type([]):
        codebook = [p.reshape(12,codebook.shape[1]/12) for p in codebook]
        print 'codebook in ndarray format transformed to list'
    # iterate
    for matfile in matfiles:
        btchroma = sio.loadmat(matfile)['btchroma']
        if len(btchroma.shape) < 2:
            continue
        if btchroma.shape[1] < MINLENGTH or np.isnan(btchroma).any():
            continue
        mask,masked_cols = MASKING.random_col_mask(btchroma,ncols=ncols,win=25)
        ########## ALGORITHM DEPENDENT
        if method == 'random':
            recon = IMPUTATION.random_col(btchroma,mask,masked_cols)
        elif method == 'randomfromsong':
            recon = IMPUTATION.random_col_from_song(btchroma,mask,masked_cols)
        elif method == 'average':
            recon = IMPUTATION.average_col(btchroma,mask,masked_cols,win=win)
        elif method == 'codebook':
            recon,used_codes = IMPUTATION.codebook_cols(btchroma,mask,masked_cols,codebook)
        elif method == 'knn_eucl':
            recon,used_cols = IMPUTATION.knn_cols(btchroma,mask,masked_cols,win=win,measure='eucl')
        elif method == 'knn_kl':
            recon,used_cols = IMPUTATION.knn_cols(btchroma,mask,masked_cols,win=win,measure='kl')
        elif method == 'lintrans':
            recon,proj = IMPUTATION.lintransform_cols(btchroma,mask,masked_cols,win=win)
        elif method == 'siplca':
            res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma*mask).copy(),
                                                      rank,mask,win=win,
                                                      convergence_thresh=1e-15,
                                                      **kwargs)
            W, Z, H, norm, recon, logprob = res
        elif method == 'siplca2':
            res = IMPUTATION_PLCA.SIPLCA2_mask.analyze((btchroma*mask).copy(),
                                                       rank,mask,win=win,
                                                       convergence_thresh=1e-15,
                                                       **kwargs)
            W, Z, H, norm, recon, logprob = res
        else:
            print 'unknown method:',method
            return
        ########## ALGORITHM DEPENDENT END
        # measure recon
        err = recon_error(btchroma,mask,recon,measure='eucl')
        if err > 100:
            print 'huge EUCL error:',err,', method =',method,',file =',matfile
        errs_eucl.append( err )
        err = recon_error(btchroma,mask,recon,measure='kl')
        if err > 100:
            print 'huge KL error:',err,', method =',method,',file =',matfile
        errs_kl.append( err )
        total_cnt += 1
    # done
    print 'number of songs tested:',total_cnt
    print 'average sq euclidean dist:',np.mean(errs_eucl),'(',np.std(errs_eucl),')'
    print 'average kl divergence:',np.mean(errs_kl),'(',np.std(errs_kl),')'
import numpy as np
import scipy.io as sio
from plottools import plotall
import pylab as P

sys.path.append('../PythonSrc')
import evaluation
import masking
import imputation


if __name__ == '__main__':

    
    btchroma145 = sio.loadmat('/home/thierry/Columbia/covers80/coversongs/covers32k/Caroline_No/beach_boys+Pet_Sounds+13-Caroline_No.mp3.mat')['btchroma']
    mask,masked_cols = masking.random_col_mask(btchroma145,ncols=1,win=30)
    recon,lt = imputation.lintransform_cols(btchroma145,mask,masked_cols,win=1)
    pos1 = masked_cols[0] - 7
    pos2 = masked_cols[0] + 7
    im1 = btchroma145[:,pos1:pos2].copy()
    im2 = (btchroma145 * mask)[:,pos1:pos2].copy()
    im3 = recon[:,pos1:pos2].copy()
    # plot all this
    fig = P.figure()
    fig.subplots_adjust(hspace=0.4)
    blackbarsfun = lambda: P.gca().axvline(linewidth=2,color='0.',x=6.5) and P.gca().axvline(linewidth=2,color='0.',x=7.5)
    plotall([im1,im2,im3],subplot=(3,1),cmap='gray_r',
            title=['original','original masked','reconstruction'],
            axvlines=blackbarsfun,colorbar=False,xticks=[()]*3)
    P.show()
def compare_all(btchroma,mask,masked_cols,codebook=None):
    """
    Compare all the algorithms we have so far.
    A lot of parameters hard-coded, but...
    It will get improved.
    We display:
      1) original beat chromagram
      2) masked beat chromagram
      3) imputation by random
      4) imputation by averaging nearby columns
      5) imputation by knn on the rest of the song
      6) imputation by linear prediction
      7) imputation by NMF
      8) imputation by codebook, if provided

    To init mask and masked_cols, something like:
    mask,masked_cols = masking.random_col_mask(btchroma,ncols=1,win=30)
    """
    # for displaying purposes, display window size 50
    pos1 = masked_cols[0] - 5
    pos2 = masked_cols[0] + 5
    allimages = []
    titles = []
    xlabels = []
    # original beat chroma and masked one
    im1 = btchroma[:,pos1:pos2].copy()
    im2 = (btchroma * mask)[:,pos1:pos2].copy()
    allimages.append(im1)
    titles.append('original beat chroma')
    xlabels.append('')
    allimages.append(im2)
    titles.append('masked beat chroma')
    xlabels.append('')
    # 3) random
    recon = IMPUTATION.random_col(btchroma,mask,masked_cols)
    im3_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl')
    im3 = recon[:,pos1:pos2].copy()
    allimages.append(im3)
    titles.append('random')
    xlabels.append('err='+str(im3_err))
    # 4) average nearby columns
    recon = IMPUTATION.average_col(btchroma,mask,masked_cols,win=3)
    im4_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl')
    im4 = recon[:,pos1:pos2].copy()
    allimages.append(im4)
    titles.append('average 2 nearby cols')
    xlabels.append('err='+str(im4_err))
    # 5) knn
    recon,used_cols = IMPUTATION.eucldist_cols(btchroma,mask,masked_cols,win=7)
    im5_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl')
    im5 = recon[:,pos1:pos2].copy()
    allimages.append(im5)
    titles.append('knn for the whole song')
    xlabels.append('err='+str(im5_err))
    # 6) linear prediction
    recon,proj = IMPUTATION.lintransform_cols(btchroma,mask,masked_cols,win=1)
    im6_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl')
    im6 = recon[:,pos1:pos2].copy()
    allimages.append(im6)
    titles.append('linear prediction (1 col)')
    xlabels.append('err='+str(im6_err))
    # 7) SIPLCA
    res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma*mask).copy(),
                                              4,mask,win=5)
    W, Z, H, norm, recon, logprob = res
    im7_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl')
    im7 = recon[:,pos1:pos2].copy()
    allimages.append(im7)
    titles.append('SIPLCA, rank=4, win=5')
    xlabels.append('err='+str(im7_err))
    # 7) SIPLCA 2
    res = IMPUTATION_PLCA.SIPLCA_mask.analyze((btchroma*mask).copy(),
                                              25,mask,win=10)
    W, Z, H, norm, recon, logprob = res
    im7_err_bis = EVAL.recon_error(btchroma,mask,recon,measure='eucl')
    im7_bis = recon[:,pos1:pos2].copy()
    allimages.append(im7_bis)
    titles.append('SIPLCA, rank=25, win=10')
    xlabels.append('err='+str(im7_err_bis))
    # 8) codebook
    if codebook != None:
        cb = [p.reshape(12,p.size/12) for p in codebook]
        recon,used_codes = IMPUTATION.codebook_cols(btchroma,mask,masked_cols,cb)
        im8_err = EVAL.recon_error(btchroma,mask,recon,measure='eucl')
        im8 = recon[:,pos1:pos2].copy()
        allimages.append(im8)
        titles.append('codebook, '+str(codebook.shape[0])+' codes of length '+str(codebook.shape[1]/12))
        xlabels.append('err='+str(im8_err))
    # ALL IMAGES CREATED
    fig = plt.figure()
    fig.subplots_adjust(hspace=0.4)
    blackbarsfun = lambda: plt.gca().axvline(linewidth=2,color='0.',x=4.5) and plt.gca().axvline(linewidth=2,color='0.',x=5.5)
    # plotall
    plotall(allimages,subplot=(3,3),cmap='gray_r',title=titles,xlabel=xlabels,axvlines=blackbarsfun,colorbar=False)