def test_z_mc4(): """test properties of z_mc, is STD correct, is 2*STD correct""" N = 3 z_arr = np.random.uniform(size=N) * 1.5 + 0.4 z_sigma = np.random.uniform(size=N) * 0.1 + 0.1 x = np.arange(0.1, 3.01, 0.01) z_mc_std = np.zeros(N) z_mc_mean = np.zeros(N) z_mc_95 = np.zeros(N) for i in range(N): pdf = mlab.normpdf(x, z_arr[i], z_sigma[i]) z_mc = np.array([pval.get_mc(pdf, x) for j in range(100000)]) z_mc_std[i] = np.std(z_mc) z_mc_mean[i] = np.mean(z_mc) z_mc_95[i] = pval.sigma_95(z_mc) dx = (x[1] - x[0]) / 2.0 print 'diff sig', z_sigma - z_mc_std, dx print 'diff mean', z_arr - z_mc_mean, dx print 'diff 2xsig', z_mc_95 - z_mc_std * 2, dx for i in range(N): np.testing.assert_almost_equal(z_sigma[i] - z_mc_std[i], 0, decimal=3) np.testing.assert_almost_equal(z_arr[i] - z_mc_mean[i], 0, decimal=3) np.testing.assert_almost_equal(2 * z_sigma[i] - z_mc_95[i], 0, decimal=2)
def test_z_mc1(): """test can recover <z_mc> for fixed gaussian sigma, and fixed mean """ N = 1000000 z_arr = np.array([0.6] * N) #np.random.uniform(size=N)*1.5 + 0.4 z_sigma = np.array([0.1] * N) #np.random.uniform(size=N)*0.1 + 0.1 x = np.arange(0.1, 3.01, 0.01) z_mc = np.zeros(N) for i in range(N): pdf = mlab.normpdf(x, z_arr[i], z_sigma[i]) z_mc[i] = pval.get_mc(pdf, x) print np.median(z_mc) - np.median(z_arr), (x[1] - x[0]) / 2.0 np.testing.assert_almost_equal(np.median(z_mc) - np.median(z_arr), 0, decimal=3)
def test_z_mc3(): """test get <z_mc> from varying gaussians with random centers and sigmas""" N = 1000000 z_arr = np.random.uniform(size=N) * 1.5 + 0.4 z_sigma = np.random.uniform(size=N) * 0.1 + 0.1 x = np.arange(0.1, 3.01, 0.01) dx = (x[1] - x[0]) / 2.0 z_mc = np.zeros(N) for i in range(N): pdf = mlab.normpdf(x, z_arr[i], z_sigma[i]) z_mc[i] = pval.get_mc(pdf, x) print np.mean(z_mc) - np.average(z_arr, weights=z_sigma) np.testing.assert_almost_equal(np.median(z_mc) - np.average(z_arr, weights=z_sigma), 0, decimal=3)
def test_z_mc2(): """test can get <z_mc> for a range of z_mean, and fixed sigma""" N = 100000 z_arr = np.random.uniform(size=N) * 1.5 + 0.4 z_sigma = np.array([0.1] * N) x = np.arange(0.1, 3.01, 0.01) z_mc = np.zeros(N) for i in range(N): pdf = mlab.normpdf(x, z_arr[i], z_sigma[i]) z_mc[i] = pval.get_mc(pdf, x) print np.median(z_mc) - np.average(z_arr, weights=z_sigma), (x[1] - x[0]) / 2.0 np.testing.assert_almost_equal(np.median(z_mc) - np.average(z_arr, weights=z_sigma), 0, decimal=4)
def test_cumaltive_to_point0(): """check we can we draw a set of z_mc from a pdf, that are flat in histogram heights <h> e.g. Bordoloi test""" from scipy.stats import entropy ngals = 20 sigs = np.random.uniform(size=ngals) * 0.5 + 0.1 x = np.arange(0.01, 3.01, 0.01) dx = (x[1] - x[0]) / 2.0 print 'sigs', sigs for i in range(ngals): pdf = mlab.normpdf(x, sigs[i], 0.02) z_mc = pval.get_mc(pdf, x, N=100000) c = pval.cumaltive_to_point(pdf, x, z_mc) h = np.histogram(c, bins=np.arange(0, 1.05, 0.05)) print h print len(h), 'len(h)' h = h[0] res = entropy(h, [np.mean(h)] * len(h)) print 'res', res np.testing.assert_array_less(res, 0.005)
def test_cumaltive_to_point01(): """check we can we draw a set of z_mc from a pdf, that are flat in histogram heights <h> e.g. Bordoloi test for sets of pdfs with 2 bins values """ from scipy.stats import entropy x = np.arange(0.01, 3.01, 0.01) dx = (x[1] - x[0]) / 2.0 for i in range(len(x) - 2): pdf = np.zeros_like(x) pdf[i:i + 2] = [0.1, 0.2] z_mc = pval.get_mc(pdf, x, N=100000) c = pval.cumaltive_to_point(pdf, x, z_mc) h = np.histogram(c, bins=np.arange(0, 1.05, 0.05)) print h print len(h), 'len(h)' h = h[0] res = entropy(h, [np.mean(h)] * len(h)) print 'res', res np.testing.assert_array_less(res, 0.005)
def parr_loop(lst): """parralisation loop for joblib lst is a [] containing ind, f_obs_, ef_obs_, prior_mag_, f_mod, gal_mag_type_prior, z_bins, config f_mod = tempalate fluxes [redshift, template, band] returns a dictionary with {'ind': ind_, 'mean': mean, 'sigma': sigma, 'median': median, 'sig68': sig68, 'z_max_post': z_max_post,'z_minchi2': z_minchi2, 'KL_post_prior': KL_post_prior, 'pdfs_': pdfs_} for later combination """ from scipy.stats import entropy ind_, f_obs_, ef_obs_, prior_mag_, f_mod, gal_mag_type_prior, z_bins, config = lst n_gals = len(ind_) #some small value to truncate probs. eps = 1e-300 eeps = np.log(eps) #results arrays for this loop mean = np.zeros(n_gals) + np.nan z_minchi2 = np.zeros(n_gals) + np.nan sigma = np.zeros(n_gals) + np.nan median = np.zeros(n_gals) + np.nan max_z_marg_likelihood = np.zeros(n_gals) + np.nan mode = np.zeros(n_gals) + np.nan mc = np.zeros(n_gals) + np.nan sig68 = np.zeros(n_gals) + np.nan KL_post_prior = np.zeros(n_gals) + np.nan min_chi2_arr = np.zeros(n_gals) + np.nan maxL_template_ind = np.zeros(n_gals, dtype=int) - 1000 pdfs_ = None if config['output_pdfs']: pdfs_ = np.zeros((n_gals, len(z_bins))) + np.nan for i in np.arange(n_gals): foo = np.sum(np.power(f_obs_[i] / ef_obs_[i], 2)) nf = len(f_mod[0, 0, :]) f = f_obs_[i].reshape(1, 1, nf) ef = ef_obs_[i].reshape(1, 1, nf) #this is a slow part of the code! fot = np.sum(np.divide(f * f_mod, np.power(ef, 2)), axis=2) #this is the slowest part of the code! ftt = np.sum(np.power(np.divide(f_mod, ef), 2), axis=2) chi2 = foo - np.power(fot, 2) / (ftt + eps) ind_mchi2 = np.where(chi2 == np.amin(chi2)) min_chi2 = chi2[ind_mchi2][0] min_chi2_arr[i] = min_chi2 z_min_chi2 = z_bins[ind_mchi2[0]][0] z_minchi2[i] = z_min_chi2 likelihood = np.exp(-0.5 * np.clip(chi2 - min_chi2, 0., -2 * eeps)) prior = np.zeros_like(likelihood) pr_mg = gal_mag_type_prior.keys() ind_mag_p = np.argmin(np.abs(prior_mag_[i] - np.array(list(pr_mg)))) for j in np.arange(len(f_mod[0, :, 0])): prior[:, j] = gal_mag_type_prior[list(pr_mg)[ind_mag_p]][j] #posterior is prior * Likelihood posterior = prior * likelihood #posterior[posterior<1.e-5/(posterior.shape[0]*posterior.shape[1])] = 0. #posterior = likelihood sv_name = str( i ) + '.prob_2d' # this is not it's id because ID info is not passed to the function #np.savez(sv_name, prior=prior, chi2=likelihood, pb=posterior) #get the maximum posterior, and determine which template this is ind_max = np.where(posterior == np.amax(posterior))[1] #if many "best maf posteriors" then choose one at random. if len(ind_max) > 1: ind_max = np.random.choice(ind_max) maxL_template_ind[i] = ind_max #margenalise over Templates in Prior and posterior: marg_post = np.sum(posterior, axis=1) #if np.sum(marg_post)==0.: # marg_post += 1./len(marg_post) #else: marg_post /= np.sum(marg_post) marg_prior = np.sum(prior, axis=1) marg_prior /= np.sum(marg_prior) marg_likelihood = np.sum(likelihood, axis=1) marg_likelihood /= np.sum(marg_likelihood) max_z_marg_likelihood[i] = z_bins[np.where( marg_likelihood == np.amax(marg_likelihood))[0][0]] KL_post_prior[i] = entropy(marg_post, marg_prior) ind_max_marg = np.where(marg_post == np.amax(marg_post))[0][0] #define summary stats from the margenalised posterior. mean[i] = pval.get_mean(marg_post, z_bins) sigma[i] = pval.get_sig(marg_post, z_bins) median[i] = pval.get_median(marg_post, z_bins) mc[i] = pval.get_mc(marg_post, z_bins) sig68[i] = pval.get_sig68(marg_post, z_bins) mode[i] = z_bins[ind_max_marg] if config['output_pdfs']: pdfs_[i] = marg_post verbose = False if key_not_none(config, 'verbose'): verbose = config['verbose'] if verbose: print('loop complete', config['n_jobs']) return { 'ind': ind_, 'mean': mean, 'sigma': sigma, 'median': median, 'sig68': sig68, 'mode': mode, 'z_minchi2': z_minchi2, 'KL_post_prior': KL_post_prior, 'pdfs_': pdfs_, 'mc': mc, 'min_chi2': min_chi2_arr, 'max_z_marg_likelihood': max_z_marg_likelihood, 'maxL_template_ind': maxL_template_ind }