Example #1
0
def test_z_mc4():
    """test properties of z_mc, is STD correct, is 2*STD correct"""
    N = 3
    z_arr = np.random.uniform(size=N) * 1.5 + 0.4
    z_sigma = np.random.uniform(size=N) * 0.1 + 0.1
    x = np.arange(0.1, 3.01, 0.01)
    z_mc_std = np.zeros(N)
    z_mc_mean = np.zeros(N)
    z_mc_95 = np.zeros(N)
    for i in range(N):
        pdf = mlab.normpdf(x, z_arr[i], z_sigma[i])
        z_mc = np.array([pval.get_mc(pdf, x) for j in range(100000)])
        z_mc_std[i] = np.std(z_mc)
        z_mc_mean[i] = np.mean(z_mc)
        z_mc_95[i] = pval.sigma_95(z_mc)
    dx = (x[1] - x[0]) / 2.0
    print 'diff sig', z_sigma - z_mc_std, dx
    print 'diff mean', z_arr - z_mc_mean, dx
    print 'diff 2xsig', z_mc_95 - z_mc_std * 2, dx
    for i in range(N):
        np.testing.assert_almost_equal(z_sigma[i] - z_mc_std[i], 0, decimal=3)
        np.testing.assert_almost_equal(z_arr[i] - z_mc_mean[i], 0, decimal=3)
        np.testing.assert_almost_equal(2 * z_sigma[i] - z_mc_95[i],
                                       0,
                                       decimal=2)
Example #2
0
def test_z_mc1():
    """test can recover <z_mc> for fixed gaussian sigma, and fixed mean """
    N = 1000000
    z_arr = np.array([0.6] * N)  #np.random.uniform(size=N)*1.5 + 0.4
    z_sigma = np.array([0.1] * N)  #np.random.uniform(size=N)*0.1 + 0.1
    x = np.arange(0.1, 3.01, 0.01)

    z_mc = np.zeros(N)
    for i in range(N):
        pdf = mlab.normpdf(x, z_arr[i], z_sigma[i])
        z_mc[i] = pval.get_mc(pdf, x)

    print np.median(z_mc) - np.median(z_arr), (x[1] - x[0]) / 2.0
    np.testing.assert_almost_equal(np.median(z_mc) - np.median(z_arr),
                                   0,
                                   decimal=3)
Example #3
0
def test_z_mc3():
    """test get <z_mc> from varying gaussians with random centers and sigmas"""
    N = 1000000
    z_arr = np.random.uniform(size=N) * 1.5 + 0.4
    z_sigma = np.random.uniform(size=N) * 0.1 + 0.1
    x = np.arange(0.1, 3.01, 0.01)
    dx = (x[1] - x[0]) / 2.0
    z_mc = np.zeros(N)
    for i in range(N):
        pdf = mlab.normpdf(x, z_arr[i], z_sigma[i])
        z_mc[i] = pval.get_mc(pdf, x)

    print np.mean(z_mc) - np.average(z_arr, weights=z_sigma)
    np.testing.assert_almost_equal(np.median(z_mc) -
                                   np.average(z_arr, weights=z_sigma),
                                   0,
                                   decimal=3)
Example #4
0
def test_z_mc2():
    """test can get <z_mc> for a range of z_mean, and fixed sigma"""
    N = 100000
    z_arr = np.random.uniform(size=N) * 1.5 + 0.4
    z_sigma = np.array([0.1] * N)
    x = np.arange(0.1, 3.01, 0.01)

    z_mc = np.zeros(N)
    for i in range(N):
        pdf = mlab.normpdf(x, z_arr[i], z_sigma[i])
        z_mc[i] = pval.get_mc(pdf, x)

    print np.median(z_mc) - np.average(z_arr,
                                       weights=z_sigma), (x[1] - x[0]) / 2.0
    np.testing.assert_almost_equal(np.median(z_mc) -
                                   np.average(z_arr, weights=z_sigma),
                                   0,
                                   decimal=4)
Example #5
0
def test_cumaltive_to_point0():
    """check we can we draw a set of z_mc from a pdf, that are flat in histogram heights <h>
    e.g. Bordoloi test"""
    from scipy.stats import entropy
    ngals = 20
    sigs = np.random.uniform(size=ngals) * 0.5 + 0.1
    x = np.arange(0.01, 3.01, 0.01)
    dx = (x[1] - x[0]) / 2.0
    print 'sigs', sigs

    for i in range(ngals):
        pdf = mlab.normpdf(x, sigs[i], 0.02)
        z_mc = pval.get_mc(pdf, x, N=100000)
        c = pval.cumaltive_to_point(pdf, x, z_mc)
        h = np.histogram(c, bins=np.arange(0, 1.05, 0.05))
        print h
        print len(h), 'len(h)'
        h = h[0]
        res = entropy(h, [np.mean(h)] * len(h))
        print 'res', res
        np.testing.assert_array_less(res, 0.005)
Example #6
0
def test_cumaltive_to_point01():
    """check we can we draw a set of z_mc from a pdf, that are flat in histogram heights <h>
    e.g. Bordoloi test for sets of pdfs with 2 bins values """
    from scipy.stats import entropy

    x = np.arange(0.01, 3.01, 0.01)
    dx = (x[1] - x[0]) / 2.0

    for i in range(len(x) - 2):
        pdf = np.zeros_like(x)
        pdf[i:i + 2] = [0.1, 0.2]
        z_mc = pval.get_mc(pdf, x, N=100000)
        c = pval.cumaltive_to_point(pdf, x, z_mc)

        h = np.histogram(c, bins=np.arange(0, 1.05, 0.05))
        print h
        print len(h), 'len(h)'
        h = h[0]
        res = entropy(h, [np.mean(h)] * len(h))
        print 'res', res
        np.testing.assert_array_less(res, 0.005)
Example #7
0
def parr_loop(lst):
    """parralisation loop for joblib
    lst is a [] containing ind, f_obs_, ef_obs_, prior_mag_, f_mod, gal_mag_type_prior, z_bins, config
    f_mod = tempalate fluxes [redshift, template, band]

    returns a dictionary with
    {'ind': ind_, 'mean': mean, 'sigma': sigma, 'median': median, 'sig68': sig68, 'z_max_post': z_max_post,'z_minchi2': z_minchi2,
            'KL_post_prior': KL_post_prior, 'pdfs_': pdfs_}
    for later combination
    """

    from scipy.stats import entropy

    ind_, f_obs_, ef_obs_, prior_mag_, f_mod, gal_mag_type_prior, z_bins, config = lst

    n_gals = len(ind_)
    #some small value to truncate probs.
    eps = 1e-300
    eeps = np.log(eps)

    #results arrays for this loop
    mean = np.zeros(n_gals) + np.nan
    z_minchi2 = np.zeros(n_gals) + np.nan
    sigma = np.zeros(n_gals) + np.nan
    median = np.zeros(n_gals) + np.nan
    max_z_marg_likelihood = np.zeros(n_gals) + np.nan
    mode = np.zeros(n_gals) + np.nan
    mc = np.zeros(n_gals) + np.nan
    sig68 = np.zeros(n_gals) + np.nan
    KL_post_prior = np.zeros(n_gals) + np.nan
    min_chi2_arr = np.zeros(n_gals) + np.nan
    maxL_template_ind = np.zeros(n_gals, dtype=int) - 1000
    pdfs_ = None
    if config['output_pdfs']:
        pdfs_ = np.zeros((n_gals, len(z_bins))) + np.nan

    for i in np.arange(n_gals):

        foo = np.sum(np.power(f_obs_[i] / ef_obs_[i], 2))

        nf = len(f_mod[0, 0, :])

        f = f_obs_[i].reshape(1, 1, nf)
        ef = ef_obs_[i].reshape(1, 1, nf)

        #this is a slow part of the code!
        fot = np.sum(np.divide(f * f_mod, np.power(ef, 2)), axis=2)

        #this is the slowest part of the code!
        ftt = np.sum(np.power(np.divide(f_mod, ef), 2), axis=2)

        chi2 = foo - np.power(fot, 2) / (ftt + eps)

        ind_mchi2 = np.where(chi2 == np.amin(chi2))

        min_chi2 = chi2[ind_mchi2][0]

        min_chi2_arr[i] = min_chi2

        z_min_chi2 = z_bins[ind_mchi2[0]][0]
        z_minchi2[i] = z_min_chi2

        likelihood = np.exp(-0.5 * np.clip(chi2 - min_chi2, 0., -2 * eeps))

        prior = np.zeros_like(likelihood)
        pr_mg = gal_mag_type_prior.keys()
        ind_mag_p = np.argmin(np.abs(prior_mag_[i] - np.array(list(pr_mg))))

        for j in np.arange(len(f_mod[0, :, 0])):
            prior[:, j] = gal_mag_type_prior[list(pr_mg)[ind_mag_p]][j]

        #posterior is prior * Likelihood
        posterior = prior * likelihood
        #posterior[posterior<1.e-5/(posterior.shape[0]*posterior.shape[1])] = 0.
        #posterior = likelihood
        sv_name = str(
            i
        ) + '.prob_2d'  # this is not it's id because ID info is not passed to the function
        #np.savez(sv_name, prior=prior, chi2=likelihood, pb=posterior)

        #get the maximum posterior, and determine which template this is
        ind_max = np.where(posterior == np.amax(posterior))[1]

        #if many "best maf posteriors" then choose one at random.
        if len(ind_max) > 1:
            ind_max = np.random.choice(ind_max)
        maxL_template_ind[i] = ind_max

        #margenalise over Templates in Prior and posterior:
        marg_post = np.sum(posterior, axis=1)
        #if np.sum(marg_post)==0.:
        #    marg_post += 1./len(marg_post)
        #else:
        marg_post /= np.sum(marg_post)
        marg_prior = np.sum(prior, axis=1)
        marg_prior /= np.sum(marg_prior)

        marg_likelihood = np.sum(likelihood, axis=1)
        marg_likelihood /= np.sum(marg_likelihood)
        max_z_marg_likelihood[i] = z_bins[np.where(
            marg_likelihood == np.amax(marg_likelihood))[0][0]]

        KL_post_prior[i] = entropy(marg_post, marg_prior)

        ind_max_marg = np.where(marg_post == np.amax(marg_post))[0][0]

        #define summary stats from the margenalised posterior.
        mean[i] = pval.get_mean(marg_post, z_bins)
        sigma[i] = pval.get_sig(marg_post, z_bins)
        median[i] = pval.get_median(marg_post, z_bins)
        mc[i] = pval.get_mc(marg_post, z_bins)
        sig68[i] = pval.get_sig68(marg_post, z_bins)
        mode[i] = z_bins[ind_max_marg]

        if config['output_pdfs']:
            pdfs_[i] = marg_post

    verbose = False
    if key_not_none(config, 'verbose'):
        verbose = config['verbose']

    if verbose:
        print('loop complete', config['n_jobs'])

    return {
        'ind': ind_,
        'mean': mean,
        'sigma': sigma,
        'median': median,
        'sig68': sig68,
        'mode': mode,
        'z_minchi2': z_minchi2,
        'KL_post_prior': KL_post_prior,
        'pdfs_': pdfs_,
        'mc': mc,
        'min_chi2': min_chi2_arr,
        'max_z_marg_likelihood': max_z_marg_likelihood,
        'maxL_template_ind': maxL_template_ind
    }