def X_gmf_all(): ''' ***TESTED -- Nov 8, 2017*** Test to make sure that NG.X_gmf_all returns correct values ''' X, nbins = NG.X_gmf_all(n_arr=True) nmid = 0.5*(nbins[1:] + nbins[:-1]) assert X.shape[1] == len(nmid) assert X.shape[0] == 20000 fig = plt.figure(figsize=(5,5)) sub = fig.add_subplot(111) for i in np.random.choice(range(X.shape[0]), 1000, replace=False): sub.plot(nmid, X[i,:], c='k', lw=0.01) sub.plot(nmid, np.average(X, axis=0), c='r', lw=2, ls='--') # x-axis sub.set_xlim([0., 180.]) sub.set_xlabel('$N$', fontsize=20) # y-axis sub.set_yscale('log') sub.set_ylabel(r'$\zeta(N)$', fontsize=20) fig.savefig(''.join([UT.fig_dir(), 'tests/X_gmf_all.png']), bbox_inches='tight') plt.close() return None
def GMF_p_Xw_i(ica=False, pca=False): ''' Test the probability distribution function of each transformed X component -- p(X^i). First compare the histograms of p(X_w^i) with N(0,1). Then compare the gaussian KDE of p(X_w^i). ''' gmf = NG.X_gmf_all() # import all the GMF mocks X, _ = NG.meansub(gmf) str_w = 'W' if ica and pca: raise ValueError if ica: # ICA components # ICA components do not need to be Gaussian. # in fact the whole point of the ICA transform # is to capture the non-Gaussianity... X_white, _ = NG.whiten(X) # whitened data X_w, _ = NG.Ica(X_white) str_w = 'ICA' if pca: # PCA components X_w, _ = NG.whiten(X, method='pca') # whitened data str_w = 'PCA' if not ica and not pca: # just whitened X_w, W = NG.whiten(X) # whitened data # p(X_w^i) histograms fig = plt.figure(figsize=(5*gmf.shape[1],4)) for icomp in range(gmf.shape[1]): sub = fig.add_subplot(1, gmf.shape[1], icomp+1) # histogram of X_w^i s hh = np.histogram(X_w[:,icomp], normed=True, bins=50, range=[-5., 5.]) p_X_w_arr = UT.bar_plot(*hh) sub.fill_between(p_X_w_arr[0], np.zeros(len(p_X_w_arr[1])), p_X_w_arr[1], color='k', alpha=0.25) x = np.linspace(-5., 5., 100) sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=2, ls=':', label='$\mathcal{N}(0,1)$') # p(X_w^i) gaussian KDE fits t_start = time.time() pdf = NG.p_Xw_i(X_w, icomp, x=x, method='gkde') sub.plot(x, pdf, lw=2.5, label='Gaussian KDE') print 'scipy Gaussian KDE ', time.time()-t_start # p(X_w^i) SKlearn KDE fits t_start = time.time() pdf = NG.p_Xw_i(X_w, icomp, x=x, method='sk_kde') sub.plot(x, pdf, lw=2.5, label='SKlearn KDE') print 'SKlearn CV best-fit KDE ', time.time()-t_start # p(X_w^i) statsmodels KDE fits t_start = time.time() pdf = NG.p_Xw_i(X_w, icomp, x=x, method='sm_kde') sub.plot(x, pdf, lw=2.5, label='StatsModels KDE') print 'Stats Models KDE ', time.time()-t_start # p(X_w^i) GMM fits pdf = NG.p_Xw_i(X_w, icomp, x=x, method='gmm', n_comp_max=20) sub.plot(x, pdf, lw=2.5, ls='--', label='GMM') sub.set_xlim([-3., 3.]) sub.set_xlabel('$X_{'+str_w+'}^{('+str(icomp)+')}$', fontsize=25) sub.set_ylim([0., 0.6]) if icomp == 0: sub.set_ylabel('$P(X_{'+str_w+'})$', fontsize=25) sub.legend(loc='upper left', prop={'size': 15}) str_ica, str_pca = '', '' if ica: str_ica = '.ICA' if pca: str_pca = '.PCA' f = ''.join([UT.fig_dir(), 'tests/test.GMF_p_Xw_i', str_pca, str_ica, '.png']) fig.savefig(f, bbox_inches='tight') return None
def divGMF(div_func='kl', Nref=1000, K=5, n_mc=10, n_comp_max=10, n_mocks=2000): ''' compare the divergence estimates between D( gauss(C_gmf) || gauss(C_gmf) ), D( gmfs || gauss(C_gmf) ), D( gmfs || p(gmfs) KDE), D( gmfs || p(gmfs) GMM), D( gmfs || PI p(gmfs^i_ICA) KDE), and D( gmfs || PI p(gmfs^i_ICA) GMM) ''' if isinstance(Nref, float): Nref = int(Nref) # read in mock GMFs from all HOD realizations (20,000 mocks) gmfs_mock = NG.X_gmf_all()[:n_mocks] n_mock = gmfs_mock.shape[0] # number of mocks print("%i mocks" % n_mock) gmfs_mock_meansub, _ = NG.meansub(gmfs_mock) # mean subtract X_w, W = NG.whiten(gmfs_mock_meansub) X_ica, _ = NG.Ica(X_w) # ICA transformation C_gmf = np.cov(X_w.T) # covariance matrix # p(gmfs) GMM gmms, bics = [], [] for i_comp in range(1,n_comp_max+1): gmm = GMix(n_components=i_comp) gmm.fit(X_w) gmms.append(gmm) bics.append(gmm.bic(X_w)) ibest = np.array(bics).argmin() kern_gmm = gmms[ibest] # p(gmfs) KDE t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10) # 10-fold cross-validation grid.fit(X_w) kern_kde = grid.best_estimator_ dt = time.time() - t0 print('%f sec' % dt) # PI p(gmfs^i_ICA) GMM kern_gmm_ica = [] for ibin in range(X_ica.shape[1]): gmms, bics = [], [] for i_comp in range(1,n_comp_max+1): gmm = GMix(n_components=i_comp) gmm.fit(X_ica[:,ibin][:,None]) gmms.append(gmm) bics.append(gmm.bic(X_ica[:,ibin][:,None])) ibest = np.array(bics).argmin() kern_gmm_ica.append(gmms[ibest]) # PI p(gmfs^i_ICA) KDE kern_kde_ica = [] for ibin in range(X_ica.shape[1]): t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10) # 10-fold cross-validation grid.fit(X_ica[:,ibin][:,None]) kern_kde_ica.append(grid.best_estimator_) dt = time.time() - t0 print('%f sec' % dt) # caluclate the divergences now div_gauss_ref, div_gauss = [], [] div_gmm, div_gmm_ica = [], [] div_kde, div_kde_ica = [], [] for i in range(n_mc): print('%i montecarlo' % i) t_start = time.time() # reference divergence in order to showcase the estimator's scatter # Gaussian distribution described by C_gmf with same n_mock mocks gauss = mvn(np.zeros(gmfs_mock.shape[1]), C_gmf, size=n_mock) div_gauss_ref_i = NG.kNNdiv_gauss(gauss, C_gmf, Knn=K, div_func=div_func, Nref=Nref) div_gauss_ref.append(div_gauss_ref_i) # estimate divergence between gmfs_white and a # Gaussian distribution described by C_gmf div_gauss_i = NG.kNNdiv_gauss(X_w, C_gmf, Knn=K, div_func=div_func, Nref=Nref) div_gauss.append(div_gauss_i) # D( gmfs || p(gmfs) GMM) div_gmm_i = NG.kNNdiv_Kernel(X_w, kern_gmm, Knn=K, div_func=div_func, Nref=Nref, compwise=False) div_gmm.append(div_gmm_i) # D( gmfs || p(gmfs) KDE) div_kde_i = NG.kNNdiv_Kernel(X_w, kern_kde, Knn=K, div_func=div_func, Nref=Nref, compwise=False) div_kde.append(div_kde_i) # D( gmfs || PI p(gmfs^i_ICA) GMM), div_gmm_ica_i = NG.kNNdiv_Kernel(X_ica, kern_gmm_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True) div_gmm_ica.append(div_gmm_ica_i) # D( gmfs || PI p(gmfs^i_ICA) KDE), div_kde_ica_i = NG.kNNdiv_Kernel(X_ica, kern_kde_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True) div_kde_ica.append(div_kde_ica_i) print('t= %f sec' % round(time.time()-t_start,2)) fig = plt.figure(figsize=(10,5)) sub = fig.add_subplot(111) hrange = [-0.15, 0.6] nbins = 50 divs = [div_gauss_ref, div_gauss, div_gmm, div_kde, div_gmm_ica, div_kde_ica] labels = ['Ref.', r'$D(\{\zeta_i^{(m)}\}\parallel \mathcal{N}({\bf C}^{(m)}))$', r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{GMM}(\{\zeta^{m}\}))$', r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{KDE}(\{\zeta^{m}\}))$', r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{GMM}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$', r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{KDE}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$'] y_max = 0. for div, lbl in zip(divs, labels): hh = np.histogram(np.array(div), normed=True, range=hrange, bins=nbins) bp = UT.bar_plot(*hh) sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none', alpha=0.5, label=lbl) y_max = max(y_max, bp[1].max()) if (np.average(div) < hrange[0]) or (np.average(div) > hrange[1]): print('divergence of %s (%f) is outside range' % (lbl, np.average(div))) sub.set_xlim(hrange) sub.set_ylim([0., y_max*1.2]) sub.legend(loc='upper left', prop={'size': 15}) # xlabels if 'renyi' in div_func: alpha = float(div_func.split(':')[-1]) sub.set_xlabel(r'Renyi-$\alpha='+str(alpha)+'$ divergence', fontsize=20) elif 'kl' in div_func: sub.set_xlabel(r'KL divergence', fontsize=20) if 'renyi' in div_func: str_div = 'renyi'+str(alpha) elif div_func == 'kl': str_div = 'kl' f_fig = ''.join([UT.fig_dir(), 'tests/kNN_divergence.gmf.K', str(K), '.', str(n_mocks), '.', str_div, '.png']) fig.savefig(f_fig, bbox_inches='tight') return None
def diverge(obvs, diver, div_func='kl', Nref=1000, K=5, n_mc=10, n_comp_max=10, n_mocks=20000, pk_mock='patchy.z1', NorS='ngc', njobs=1): ''' calculate the divergences: - D( gauss(C_X) || gauss(C_X) ) - D( mock X || gauss(C_X)) - D( mock X || p(X) KDE) - D( mock X || p(X) GMM) - D( mock X || PI p(X^i_ICA) KDE) - D( mock X || PI p(X^i_ICA) GMM) ''' if isinstance(Nref, float): Nref = int(Nref) if diver not in [ 'ref', 'pX_gauss', 'pX_gauss_hartlap', 'pX_GMM', 'pX_GMM_ref', 'pX_KDE', 'pX_KDE_ref', 'pX_scottKDE', 'pX_scottKDE_ref', 'pXi_ICA_GMM', 'pXi_ICA_GMM_ref', 'pXi_parICA_GMM', 'pXi_parICA_GMM_ref', 'pXi_ICA_KDE', 'pXi_ICA_KDE_ref', 'pXi_parICA_KDE', 'pXi_parICA_KDE_ref', 'pXi_ICA_scottKDE', 'pXi_ICA_scottKDE_ref', 'pXi_parICA_scottKDE', 'pXi_parICA_scottKDE_ref' ]: raise ValueError str_obvs = '' if obvs == 'pk': str_obvs = '.' + NorS if 'renyi' in div_func: alpha = float(div_func.split(':')[-1]) str_div = 'renyi' + str(alpha) elif div_func == 'kl': str_div = 'kl' str_comp = '' if 'GMM' in diver: str_comp = '.ncomp' + str(n_comp_max) f_dat = ''.join([ UT.dat_dir(), 'diverg/', 'diverg.', obvs, str_obvs, '.', diver, '.K', str(K), str_comp, '.Nref', str(Nref), '.', str_div, '.dat' ]) if not os.path.isfile(f_dat): print('-- writing to -- \n %s' % f_dat) f_out = open(f_dat, 'w') else: print('-- appending to -- \n %s' % f_dat) # read in mock data X if obvs == 'pk': X_mock = NG.X_pk_all(pk_mock, NorS=NorS, sys='fc') elif obvs == 'gmf': if n_mocks is not None: X_mock = NG.X_gmf_all()[:n_mocks] else: X_mock = NG.X_gmf_all() else: raise ValueError("obvs = 'pk' or 'gmf'") n_mock = X_mock.shape[0] # number of mocks print("%i mocks" % n_mock) X_mock_meansub, _ = NG.meansub(X_mock) # mean subtract X_w, W = NG.whiten(X_mock_meansub) if '_ICA' in diver: X_ica, W_ica = NG.Ica(X_w) # ICA transformation W_ica_inv = sp.linalg.pinv(W_ica.T) elif '_parICA' in diver: # FastICA transformation using parallel algorithm X_ica, W_ica = NG.Ica(X_w, algorithm='parallel') W_ica_inv = sp.linalg.pinv(W_ica.T) if diver in ['pX_gauss', 'ref']: C_X = np.cov(X_w.T) # covariance matrix elif diver in ['pX_gauss_hartlap']: C_X = np.cov(X_w.T) # covariance matrix f_hartlap = (n_mock - float(X_mock.shape[1]) - 2.) / (n_mock - 1.) print("hartlap factor = %f" % f_hartlap) C_X = C_X / f_hartlap # scale covariance matrix by hartlap factor elif diver in ['pX_GMM', 'pX_GMM_ref']: # p(mock X) GMM gmms, bics = [], [] for i_comp in range(1, n_comp_max + 1): gmm = GMix(n_components=i_comp) gmm.fit(X_w) gmms.append(gmm) bics.append(gmm.bic(X_w)) ibest = np.array(bics).argmin() kern_gmm = gmms[ibest] elif diver in ['pX_KDE', 'pX_KDE_ref']: # p(mock X) KDE t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10, n_jobs=njobs) # 10-fold cross-validation grid.fit(X_w) kern_kde = grid.best_estimator_ dt = time.time() - t0 print('%f sec' % dt) elif diver in ['pX_scottKDE', 'pX_scottKDE_ref']: # p(mock X) KDE # calculate Scott's Rule KDE t0 = time.time() kern_kde = UT.KayDE(X_w) dt = time.time() - t0 print('%f sec' % dt) elif diver in [ 'pXi_ICA_GMM', 'pXi_ICA_GMM_ref', 'pXi_parICA_GMM', 'pXi_parICA_GMM_ref' ]: # PI p(X^i_ICA) GMM kern_gmm_ica = [] for ibin in range(X_ica.shape[1]): gmms, bics = [], [] for i_comp in range(1, n_comp_max + 1): gmm = GMix(n_components=i_comp) gmm.fit(X_ica[:, ibin][:, None]) gmms.append(gmm) bics.append(gmm.bic(X_ica[:, ibin][:, None])) ibest = np.array(bics).argmin() kern_gmm_ica.append(gmms[ibest]) elif diver in [ 'pXi_ICA_KDE', 'pXi_ICA_KDE_ref', 'pXi_parICA_KDE', 'pXi_parICA_KDE_ref' ]: # PI p(X^i_ICA) KDE kern_kde_ica = [] for ibin in range(X_ica.shape[1]): t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10, n_jobs=njobs) # 10-fold cross-validation grid.fit(X_ica[:, ibin][:, None]) kern_kde_ica.append(grid.best_estimator_) dt = time.time() - t0 print('%f sec' % dt) elif diver in [ 'pXi_ICA_scottKDE', 'pXi_ICA_scottKDE_ref', 'pXi_parICA_scottKDE', 'pXi_parICA_scottKDE_ref' ]: # PI p(X^i_ICA) KDE kern_kde_ica = [] for ibin in range(X_ica.shape[1]): kern_kde_i = UT.KayDE(X_ica[:, ibin]) kern_kde_ica.append(kern_kde_i) # caluclate the divergences now divs = [] for i in range(n_mc): print('%i montecarlo' % i) t0 = time.time() if diver in ['pX_gauss', 'pX_gauss_hartlap']: # estimate divergence between gmfs_white and a # Gaussian distribution described by C_gmf div_i = NG.kNNdiv_gauss(X_w, C_X, Knn=K, div_func=div_func, Nref=Nref, njobs=njobs) elif diver == 'ref': # reference divergence in order to showcase the estimator's scatter # Gaussian distribution described by C_gmf with same n_mock mocks gauss = mvn(np.zeros(X_mock.shape[1]), C_X, size=n_mock) div_i = NG.kNNdiv_gauss(gauss, C_X, Knn=K, div_func=div_func, Nref=Nref, njobs=njobs) elif diver == 'pX_GMM': # D( mock X || p(X) GMM) div_i = NG.kNNdiv_Kernel(X_w, kern_gmm, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) elif diver == 'pX_GMM_ref': # D( sample from p(X) GMM || p(X) GMM) samp = kern_gmm.sample(n_mock) div_i = NG.kNNdiv_Kernel(samp[0], kern_gmm, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) elif diver in ['pX_KDE', 'pX_scottKDE']: # D( mock X || p(X) KDE) div_i = NG.kNNdiv_Kernel(X_w, kern_kde, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) divs.append(div_i) elif diver in ['pX_KDE_ref', 'pX_scottKDE_ref' ]: # D( sample from p(X) KDE || p(X) KDE) samp = kern_kde.sample(n_mock) div_i = NG.kNNdiv_Kernel(samp, kern_kde, Knn=K, div_func=div_func, Nref=Nref, compwise=False, njobs=njobs) divs.append(div_i) elif diver in ['pXi_ICA_GMM', 'pXi_parICA_GMM']: # D( mock X || PI p(X^i_ICA) GMM), div_i = NG.kNNdiv_Kernel(X_w, kern_gmm_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) elif diver in ['pXi_ICA_GMM_ref', 'pXi_parICA_GMM_ref']: # D( ref. sample || PI p(X^i_ICA) GMM), samp = np.zeros((n_mock, X_ica.shape[1])) for icomp in range(X_ica.shape[1]): samp_i = kern_gmm_ica[icomp].sample(n_mock) samp[:, icomp] = samp_i[0].flatten() samp = np.dot(samp, W_ica_inv.T) div_i = NG.kNNdiv_Kernel(samp, kern_gmm_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) elif diver in [ 'pXi_ICA_KDE', 'pXi_ICA_scottKDE', 'pXi_parICA_KDE', 'pXi_parICA_scottKDE' ]: # D( mock X || PI p(X^i_ICA) KDE), div_i = NG.kNNdiv_Kernel(X_w, kern_kde_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) elif diver in [ 'pXi_ICA_KDE_ref', 'pXi_ICA_scottKDE_ref', 'pXi_parICA_KDE_ref', 'pXi_parICA_scottKDE_ref' ]: # D( ref sample || PI p(X^i_ICA) KDE), samp = np.zeros((n_mock, X_ica.shape[1])) for icomp in range(X_ica.shape[1]): samp_i = kern_kde_ica[icomp].sample(n_mock) samp[:, icomp] = samp_i.flatten() samp = np.dot(samp, W_ica_inv.T) div_i = NG.kNNdiv_Kernel(samp, kern_kde_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True, njobs=njobs, W_ica_inv=W_ica_inv) print(div_i) f_out = open(f_dat, 'a') f_out.write('%f \n' % div_i) f_out.close() return None
def W_importance(tag, chain, ica_algorithm=None, density_method='kde', n_comp_max=20, info_crit='bic', njobs=1, **kwargs): ''' Given a dictionary with the MCMC chain, evaluate the likelihood ratio ''' if 'RSD' in tag: # Florian's RSD analysis if 'zbin' not in kwargs.keys(): raise ValueError('specify zbin in kwargs') # read in BOSS P(k) (data D) k_list, pk_ngc_data, pk_sgc_data = [], [], [] pkay = Dat.Pk() for ell in [0, 2, 4]: k, plk_ngc = pkay.Observation(ell, kwargs['zbin'], 'ngc') _, plk_sgc = pkay.Observation(ell, kwargs['zbin'], 'sgc') k_list.append(k) pk_ngc_data.append(plk_ngc) pk_sgc_data.append(plk_sgc) pk_ngc_data = np.concatenate(pk_ngc_data) pk_sgc_data = np.concatenate(pk_sgc_data) binrange1, binrange2, binrange3 = len(k_list[0]), len(k_list[1]), len( k_list[2]) maxbin1 = len(k_list[0]) + 1 k = np.concatenate(k_list) # calculate D - m(theta) for all the mcmc chain delta_ngc = chain['pk_ngc'] - pk_ngc_data delta_sgc = chain['pk_sgc'] - pk_sgc_data # import PATCHY mocks pk_ngc_list, pk_sgc_list = [], [] for ell in [0, 2, 4]: if ell == 4: kmax = 0.1 else: kmax = 0.15 pk_ngc_list.append( NG.X_pk('patchy.z' + str(kwargs['zbin']), krange=[0.01, kmax], ell=ell, NorS='ngc', sys='fc')) pk_sgc_list.append( NG.X_pk('patchy.z' + str(kwargs['zbin']), krange=[0.01, kmax], ell=ell, NorS='sgc', sys='fc')) pk_ngc_mock = np.concatenate(pk_ngc_list, axis=1) pk_sgc_mock = np.concatenate(pk_sgc_list, axis=1) if tag == 'RSD_pXiICA_gauss': # P_ICA(D - m(theta)) / P_PCA,Gauss(D - m(theta)) lnP_ica_ngc = NG.lnL_pXi_ICA(delta_ngc, pk_ngc_mock, ica_algorithm=ica_algorithm, density_method=density_method, n_comp_max=n_comp_max, info_crit=info_crit, njobs=njobs) lnP_ica_sgc = NG.lnL_pXi_ICA(delta_sgc, pk_sgc_mock, ica_algorithm=ica_algorithm, density_method=density_method, n_comp_max=n_comp_max, info_crit=info_crit, njobs=njobs) lnP_gauss_ngc = NG.lnL_pca_gauss(delta_ngc, pk_ngc_mock) lnP_gauss_sgc = NG.lnL_pca_gauss(delta_sgc, pk_sgc_mock) lnP_num = lnP_ica_ngc + lnP_ica_sgc lnP_den = lnP_gauss_ngc + lnP_gauss_sgc elif tag == 'RSD_ica_chi2': # this should be consistent with above! lnP_ica_ngc = NG.lnL_pXi_ICA(delta_ngc, pk_ngc_mock, ica_algorithm=ica_algorithm, density_method=density_method, n_comp_max=n_comp_max, info_crit=info_crit, njobs=njobs) lnP_ica_sgc = NG.lnL_pXi_ICA(delta_sgc, pk_sgc_mock, ica_algorithm=ica_algorithm, density_method=density_method, n_comp_max=n_comp_max, info_crit=info_crit, njobs=njobs) lnP_num = lnP_ica_ngc + lnP_ica_sgc lnP_den = -0.5 * chain['chi2'] elif 'gmf' in tag: # GMF geemf = Dat.Gmf() # read in SDSS GMF (data D) nbins, gmf_data = geemf.Observation() # calculate D - m(theta) for all the mcmc chain dgmf = gmf_data - chain['gmf'] # read mock gmfs (all mocks from 100 differnet HOD parameter points) gmf_mock = NG.X_gmf_all( ) #gmf_mock = NG.X_gmf('manodeep.run'+str(kwargs['run']))# # old likelihood derived from chi-squared of MCMC chain lnP_den = -0.5 * chain['chi2'] # -0.5 chi-squared from MCMC chain if tag == 'gmf_all_chi2': # importance weight determined by the ratio of # the chi^2 from the chain and the chi^2 calculated # using the covariance matrix from the entire catalog # we note that Sinha et al.(2017) does not include the # hartlap factor Cgmf = np.cov(gmf_mock.T) # covariance matrix Cinv = np.linalg.inv(Cgmf) # precision matrix lnP_num = np.empty(dgmf.shape[0]) for i in range(dgmf.shape[0]): # updated chi-square lnP_num[i] = -0.5 * np.dot(dgmf[i, :], np.dot(Cinv, dgmf[i, :].T)) if tag == 'gmf_pXiICA_chi2': # updated likelihood is calculated using # ln( PI_i p( delta_X_ICA_i | X_ICA_i^(gmm/kde)) ) lnP_num = NG.lnL_pXi_ICA(dgmf, gmf_mock, ica_algorithm=ica_algorithm, density_method=density_method, n_comp_max=n_comp_max, info_crit=info_crit, njobs=njobs) elif tag == 'gmf_pX_chi2': # updated likelihood is calculated using # ln( p( delta_X | X^(gmm/kde) ) ) lnP_num = NG.lnL_pX(dgmf, gmf_mock, density_method=density_method, n_comp_max=n_comp_max, info_crit=info_crit, njobs=njobs) elif tag == 'gmf_lowN_chi2': # importance weight determined by the ratio of # the chi^2 from the chain and the chi^2 calculated # using the covariance matrix from the entire catalog # and *excluding the highest N bin* Cgmf = np.cov(gmf_mock.T) # covariance matrix Cinv = np.linalg.inv(Cgmf) # precision matrix Nlim = Cinv.shape[0] - 1 lnP_num = np.empty(dgmf.shape[0]) for i in range(dgmf.shape[0]): # updated chi-square lnP_num[i] = -0.5 * np.dot( dgmf[i, :Nlim], np.dot(Cinv[:Nlim, :Nlim], dgmf[i, :Nlim].T)) else: raise NotImplementedError else: raise ValueError ws = np.exp(lnP_num - lnP_den) return [lnP_den, lnP_num, ws]