Ejemplo n.º 1
0
def lnL_sys(mock, ell=0, rebin=None, sys='fc'): 
    ''' Compare the pseudo gaussian L with no systematics, ICA L estimation with 
    no systematics, and ICA L estimation with fiber collisions.
    '''
    # Likelihood without systematics 
    Pk_nosys = NG.dataX(mock, ell=ell, rebin=rebin, sys=None)
    gauss = NG.lnL_pca_gauss(Pk_nosys, Pk_nosys)
    ica_nosys = NG.lnL_ica(Pk_nosys, Pk_nosys)
    
    # Likelihood with specified systematics 
    Pk_sys = NG.dataX(mock, ell=ell, rebin=rebin, sys=sys)
    ica_sys = NG.lnL_ica(Pk_sys, Pk_sys)
    
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    nbin = 32
    sub.hist(gauss, bins=nbin, range=[-2.2*Pk_nosys.shape[0], -0.8*Pk_nosys.shape[0]], 
            normed=True, alpha=0.75, label='Gaussian $\mathcal{L^\mathtt{pseudo}}$; no sys.')
    sub.hist(ica_nosys, bins=nbin, range=[-2.2*Pk_nosys.shape[0], -0.8*Pk_nosys.shape[0]], 
            normed=True, alpha=0.75, label='ICA; no sys.')
    sub.hist(ica_sys, bins=nbin, range=[-2.2*Pk_nosys.shape[0], -0.8*Pk_nosys.shape[0]], 
            normed=True, alpha=0.75, label='ICA; w/ sys.')
    sub.set_xlabel('log $\mathcal{L}$', fontsize=25)
    sub.set_xlim([-2.2*Pk_nosys.shape[0], -0.5*Pk_nosys.shape[0]])
    sub.legend(loc='upper left', prop={'size': 20}) 

    if rebin is None: # save fig
        f = ''.join([UT.fig_dir(), 'tests/test.lnL_sys.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.lnL_sys.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
Ejemplo n.º 2
0
def ica(mock, ell=0, rebin=None): 
    ''' *** TESTED *** 
    Test that the ICA works!
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin)
    X, _ = NG.meansub(Pk)
    X_w, W = NG.whiten(X) # whitened data
    X_ica, W = NG.Ica(X_w)
    
    # compare covariance? 
    C_X = np.cov(X.T)
    C_Xica = np.cov(X_ica.T) 

    prettyplot()
    fig = plt.figure(figsize=(20, 8))
    sub = fig.add_subplot(121)
    im = sub.imshow(np.log10(C_X), interpolation='none')
    sub.set_title('log(Cov.) of Data')
    fig.colorbar(im, ax=sub) 

    sub = fig.add_subplot(122)
    im = sub.imshow(C_Xica, interpolation='none')
    fig.colorbar(im, ax=sub) 
    sub.set_title('Cov. of ICA transformed Data')
    # save fig
    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.ICAcov.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.ICAcov.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None
Ejemplo n.º 3
0
def lnL_pca_kde(mock, ell=0, rebin=None, krange=None): 
    ''' ***TESTED: expectedly, more discrepant for low number of 
    mock catalogs. For Nseries monopole with 1000 mocks, no 
    significant discrepancy in the likelihood distribution 
    *** 
    Test whether or not the Gaussian KDE approximation of pdfs 
    is sufficiently accurate by comparing the likelihood estimated
    from NG.lnL_pca vs NG.lnL_pca_gauss. If they are highly 
    discrepant, then KDE estimate of the pdfs are not very accurate. 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    pca_gauss = NG.lnL_pca_gauss(Pk, Pk)
    pca_kde = NG.lnL_pca(Pk, Pk) 

    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    nbin = 32 
    sub.hist(pca_gauss, bins=nbin, range=[-2.2*Pk.shape[1], -0.5*Pk.shape[1]], 
            normed=True, alpha=0.75, label='Gaussian $\mathcal{L^\mathtt{pseudo}}$')
    sub.hist(pca_kde, bins=nbin, range=[-2.2*Pk.shape[1], -0.8*Pk.shape[1]], 
            normed=True, alpha=0.75, label='$\mathcal{L^\mathtt{pseudo}}$ KDE estimate')
    sub.set_xlabel('log $\mathcal{L}$', fontsize=25)
    sub.set_xlim([-2.2*Pk.shape[1], -0.5*Pk.shape[1]])
    sub.legend(loc='upper left', prop={'size': 20}) 

    if rebin is None: # save fig
        f = ''.join([UT.fig_dir(), 'tests/test.lnL_kde_test.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.lnL_kde_test.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
Ejemplo n.º 4
0
def lnL(mock, ell=0, rebin=None, krange=None): 
    ''' Test the ICA likelihood estimation and pseudo gaussian likelihood 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    ica = NG.lnL_ica(Pk, Pk) 
    gauss = NG.lnL_pca_gauss(Pk, Pk)
    
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    nbin = 32
    sub.hist(gauss, bins=nbin, range=[-2.2*Pk.shape[1], -0.8*Pk.shape[1]], 
            normed=True, alpha=0.75, label='Gaussian $\mathcal{L^\mathtt{pseudo}}$')
    sub.hist(ica, bins=nbin, range=[-2.2*Pk.shape[1], -0.8*Pk.shape[1]], 
            normed=True, alpha=0.75, label='ICA')
    sub.set_xlabel('log $\mathcal{L}$', fontsize=25)
    sub.set_xlim([-2.2*Pk.shape[1], -0.5*Pk.shape[1]])
    sub.legend(loc='upper left', prop={'size': 20}) 

    str_rebin = ''
    if rebin is not None: 
        str_rebin = '.rebin'+str(rebin)
    str_krange = ''
    if krange is not None: 
        str_krange = '.kmin'+str(krange[0])+'.kmax'+str(krange[1])
    
    f = ''.join([UT.fig_dir(), 'tests/test.lnL.', mock, '.ell', str(ell), 
        str_rebin, str_krange, '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
Ejemplo n.º 5
0
def whiten(mock, ell=0, rebin=None, krange=None, method='choletsky'): 
    ''' ***TESTED: Choletsky decomposition fails for full binned Nseries
    P(k) because the precision matrix estimate is not positive definite***
    test the data whitening. 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    X_w, W = NG.whiten(X, method=method) # whitened data
    
    prettyplot()
    fig = plt.figure(figsize=(15,7))
    sub = fig.add_subplot(121)
    for i in range(X.shape[1]): 
        sub.plot(range(X_w.shape[0]), X_w[:,i])
    
    sub.set_xlim([0, X.shape[0]]) 
    sub.set_xlabel('$\mathtt{k}$ bins', fontsize=25)
    sub.set_ylim([-7., 7.])
    sub.set_ylabel('$\mathtt{W^{T} (P^i_'+str(ell)+'- \overline{P_'+str(ell)+'})}$', fontsize=25)
    
    C_Xw = np.cov(X_w.T)
    sub = fig.add_subplot(122)
    im = sub.imshow(C_Xw, interpolation='none')
    fig.colorbar(im, ax=sub) 
    
    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.whiten.', method, '.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.whiten.', method, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
Ejemplo n.º 6
0
def whiten_recon(mock, ell=0, rebin=None, krange=None, method='choletsky'): 
    ''' ***TESTED: The whitening matrices reconstruct the P(k)s*** 
    Test whether P(k) can be reconstructed using the whitening matrix  
    '''
    Pk, k = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange, k_arr=True)
    X, mu_X = NG.meansub(Pk)
    X_w, W = NG.whiten(X, method=method) # whitened data
    
    prettyplot()
    fig = plt.figure(figsize=(15,7))
    sub = fig.add_subplot(121)
    for i in range(X.shape[0]): 
        sub.plot(k, Pk[i,:])
    if krange is None: 
        sub.set_xlim([1e-3, 0.5])
    else: 
        sub.set_xlim(krange)
    sub.set_xscale('log')
    sub.set_xlabel('$\mathtt{k}$', fontsize=25)
    sub.set_yscale('log') 
    sub.set_ylim([2e3, 2.5e5])
    
    np.random.seed(7)
    sub = fig.add_subplot(122)
    for i in range(X.shape[0]): 
        X_noise = np.random.normal(size=X_w.shape[1])
        X_rec = np.linalg.solve(W.T, X_noise.T)
        sub.plot(k, X_rec.T + mu_X)
    if krange is None: 
        sub.set_xlim([1e-3, 0.5])
    else: 
        sub.set_xlim(krange)
    sub.set_xscale('log')
    sub.set_xlabel('$\mathtt{k}$', fontsize=25)
    sub.set_yscale('log') 
    sub.set_ylim([2e3, 2.5e5])

    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.whiten_recon.', method, '.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.whiten_recon.', method, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
Ejemplo n.º 7
0
def p_Xw_i_MISE(mock, ell=0, rebin=None, krange=None, method='choletsky', b=0.1):
    ''' Examine the pdf of X_w^i components that deviate significantly from  
    N(0,1) based on MISE 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    X_w, W = NG.whiten(X, method=method) # whitened data
    
    # calculate the chi-squared values of each p(X_w^i)  
    x = np.arange(-5., 5.1, 0.1)
    mise = np.zeros(X_w.shape[1])
    for i_bin in range(X_w.shape[1]): 
        mise[i_bin] = NG.MISE(X_w[:,i_bin], b=b) 

    # plot the most discrepant components. 
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    i_sort = np.argsort(mise)
    print 'outlier bins = ', i_sort[-5:]
    print 'mise = ', mise[i_sort[-5:]]

    nbin = int(10./b)
    for i_bin in i_sort[-10:]: 
        hb_Xi, Xi_edges = np.histogram(X_w[:,i_bin], bins=nbin, range=[-5., 5.], normed=True) 
        p_X_w_arr = UT.bar_plot(Xi_edges, hb_Xi)
        sub.plot(p_X_w_arr[0], p_X_w_arr[1])

    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X^{i}_{W}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X^{i}_{W})}$', fontsize=25) 
    sub.legend(loc='upper right') 
    
    str_rebin = ''
    if rebin is not None: 
        str_rebin = '.rebin'+str(rebin)

    f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), 
        str_rebin, '.b', str(b), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None
Ejemplo n.º 8
0
def p_Xw_i_outlier(mock, ell=0, rebin=None, krange=None, method='choletsky'):
    ''' Examine the pdf of X_w^i components that deviate significantly from  
    N(0,1) 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    X_w, W = NG.whiten(X, method=method) # whitened data
    
    # calculate the chi-squared values of each p(X_w^i)  
    x = np.arange(-5., 5.1, 0.1)
    chi2 = np.zeros(X_w.shape[1])
    for i_bin in range(X_w.shape[1]): 
        kern = gkde(X_w[:,i_bin]) # gaussian KDE kernel using "rule of thumb" scott's rule. 
        chi2[i_bin] = np.sum((UT.gauss(x, 1., 0.) - kern.evaluate(x))**2)/np.float(len(x))
    
    # plot the most discrepant components. 
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    i_sort = np.argsort(chi2)
    print 'outlier bins = ', i_sort[-5:]
    for i_bin in i_sort[-10:]: 
        kern = gkde(X_w[:,i_bin]) # gaussian KDE kernel using "rule of thumb" scott's rule. 
        sub.plot(x, kern.evaluate(x))
    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X^{i}_{W}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X^{i}_{W})}$', fontsize=25) 
    sub.legend(loc='upper right') 
    
    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None
Ejemplo n.º 9
0
def dataX(mock, ell=0, rebin=None, krange=None): 
    ''' ***TESTED***
    Test the data X calculation 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)

    for i in range(X.shape[0]): 
        sub.plot(range(X.shape[1]), X[i,:])
    
    sub.set_xlim([0, X.shape[1]]) 
    sub.set_xlabel('$\mathtt{k}$ bins', fontsize=25)
    sub.set_ylim([-1e5, 1e5])
    sub.set_ylabel('$\mathtt{P^i_'+str(ell)+'(k) - \overline{P_'+str(ell)+'(k)}}$', fontsize=25)
    if rebin is not None: 
        f = ''.join([UT.fig_dir(), 'tests/test.dataX.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.dataX.', mock, '.ell', str(ell), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
Ejemplo n.º 10
0
def p_Xw_i(mock, ell=0, rebin=None, krange=None, ica=False, pca=False): 
    ''' Test the probability distribution function of each X_w^i
    component -- p(X_w^i). First compare the histograms of p(X_w^i) 
    with N(0,1). Then compare the gaussian KDE of p(X_w^i).
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    str_w = 'W'
    if ica and pca: 
        raise ValueError
    if ica: # ICA components
        # ICA components do not need to be Gaussian.
        # in fact the whole point of the ICA transform
        # is to capture the non-Gaussianity...
        X_white, _ = NG.whiten(X) # whitened data
        X_w, _ = NG.Ica(X_white) 
        str_w = 'ICA'
    if pca: # PCA components
        X_w, _ = NG.whiten(X, method='pca') # whitened data
        str_w = 'PCA'
    if not ica and not pca: # just whitened 
        X_w, W = NG.whiten(X) # whitened data
    
    # p(X_w^i) histograms
    fig = plt.figure(figsize=(15,7))
    sub = fig.add_subplot(121)
    for i_bin in range(X_w.shape[1]): 
        p_X_w, edges = np.histogram(X_w[:,i_bin], normed=True)
        p_X_w_arr = UT.bar_plot(edges, p_X_w)
        sub.plot(p_X_w_arr[0], p_X_w_arr[1])
    x = np.arange(-5., 5.1, 0.1)
    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X_{'+str_w+'}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X_{'+str_w+'})}$', fontsize=25) 
    sub.legend(loc='upper right') 

    # p(X_w^i) gaussian KDE fits  
    pdfs = NG.p_Xw_i(X_w, range(X_w.shape[1]), x=x)

    sub = fig.add_subplot(122)
    for i_bin in range(X_w.shape[1]): 
        sub.plot(x, pdfs[i_bin])
    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X_{W}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X_{W})}$', fontsize=25) 
    sub.legend(loc='upper right') 

    str_ica, str_pca = '', ''
    if ica: 
        str_ica = '.ICA'
    if pca: 
        str_pca = '.PCA'

    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i', str_pca, str_ica, '.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i', str_pca, str_ica, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
Ejemplo n.º 11
0
def p_Xwi_Xwj_outlier(mock, ell=0, rebin=None, krange=None, ica=False, pca=False): 
    ''' Compare the joint pdfs of whitened X components (i.e. X_w^i, X_w^j)
    p(X_w^i, X_w^j) to p(X_w^i) p(X_w^j) in order to test the independence 
    argument. 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    if ica and pca: 
        raise ValueError
    if ica: # ICA components
        X_white, _ = NG.whiten(X) # whitened data
        X_w, _ = NG.Ica(X_white) 
    if pca: # PCA components
        X_w, _ = NG.whiten(X, method='pca') # whitened data
    if not ica and not pca: # just whitened 
        X_w, _ = NG.whiten(X, method='choletsky') # whitened data
    
    x, y = np.linspace(-5., 5., 50), np.linspace(-5., 5., 50)
    xx, yy = np.meshgrid(x,y)
    pos = np.vstack([xx.ravel(), yy.ravel()])
    
    ij_i, ij_j = np.meshgrid(range(X_w.shape[1]), range(X_w.shape[1]))
    ij = np.vstack([ij_i.ravel(), ij_j.ravel()])

    # joint pdfs of X_w^i and X_w^j estimated from mocks  
    # i.e. p(X_w^i, X_w^j)
    pdfs_2d = NG.p_Xwi_Xwj(X_w, ij, x=x, y=y)

    # p(X_w^i) * p(X_w^j) estimated from mocks
    pXwi = NG.p_Xw_i(X_w, range(X_w.shape[1]), x=x)
    pXwj = pXwi 

    # calculate L2 norm difference betwen joint pdf and 2d gaussian 
    chi2 = np.zeros(len(pdfs_2d))
    for i in range(len(pdfs_2d)): 
        if not isinstance(pdfs_2d[i], float): 
            pXwipXwj = np.dot(pXwi[ij[0,i]][:,None], pXwj[ij[1,i]][None,:]).T.flatten()
            chi2[i] = np.sum((pXwipXwj - pdfs_2d[i])**2)
    
    # ij values with the highest chi-squared
    ii_out = np.argsort(chi2)[-10:]
    inc = np.where(ij[0,ii_out] > ij[1,ii_out]) 

    prettyplot()
    fig = plt.figure(figsize=(len(inc[0])*10, 8))
    for ii, i_sort_i in enumerate(ii_out[inc]): 
        sub = fig.add_subplot(1, len(inc[0]), ii+1)
        # plot p(X_w^i) * p(X_w^j) 
        pXwipXwj = np.dot(pXwi[ij[0,i_sort_i]][:,None], pXwj[ij[1,i_sort_i]][None,:]).T
        sub.contourf(xx, yy, pXwipXwj, cmap='gray_r', levels=[0.05, 0.1, 0.15, 0.2])
    
        # p(X_w^i, X_w^j) 
        Z = np.reshape(pdfs_2d[i_sort_i], xx.shape)
        cs = sub.contour(xx, yy, Z, colors='k', linestyles='dashed', levels=[0.05, 0.1, 0.15, 0.2])
        cs.collections[0].set_label('$\mathtt{p(X_w^i, X_w^j)}$') 

        sub.set_xlim([-3., 3.])
        sub.set_xlabel('$\mathtt{X_w^{i='+str(ij[0,i_sort_i])+'}}$', fontsize=25)
        sub.set_ylim([-3., 3.])
        sub.set_ylabel('$\mathtt{X_w^{j='+str(ij[1,i_sort_i])+'}}$', fontsize=25)
        if ii == 0: 
            sub.legend(loc='upper right', prop={'size':25})
        else: 
            sub.set_yticklabels([])
    
    str_ica, str_pca = '', ''
    if ica: 
        str_ica = '.ICA'
    if pca: 
        str_pca = '.PCA'
    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xwi_Xwj_outlier', str_ica, str_pca, '.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xwi_Xwj_outlier', str_ica, str_pca, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None