Beispiel #1
0
def _galCorr(cat, scale_factor, outputdir):
    'Helper function that uses the built in cat object'
    h = 0.7
    RBINS = np.logspace(-1, 1.25, 15)
    redshift = 1.0/scale_factor - 1.0

    print cat

    if outputdir[-1] != '/':
        outputdir+='/'

    #Note: Confusing name between cat and halocat. Consider changing.
    halocat = CachedHaloCatalog(simname = cat.simname, halo_finder = cat.halo_finder,version_name = cat.version_name, redshift = redshift)

    model = HodModelFactory(
            centrals_occupation=StepFuncCens(redshift=redshift),
            centrals_profile=TrivialPhaseSpace(redshift=redshift),
            satellites_occupation=StepFuncSats(redshift=redshift),
            satellites_profile=NFWPhaseSpace(redshift=redshift))

    model.populate_mock(halocat, Num_ptcl_requirement = 30)

    #Now, calculate with Halotools builtin
    #TODO include the fast version
    x, y, z = [model.mock.galaxy_table[c] for c in ['x','y','z'] ]
    pos = return_xyz_formatted_array(x,y,z)
    #TODO N procs
    xi_all = tpcf(pos*h, RBINS, period = model.mock.Lbox*h, num_threads =  cpu_count())

    np.savetxt(outputdir + 'xi_all_gal_%.3f_h.npy' %(scale_factor), xi_all)
Beispiel #2
0
def compute_real_tpcf(r, pos, vel, boxsize, num_threads=1):
    '''

	Computes the real space two point correlation function using halotools

	Args:
		r: np.array
			 binning in pair distances.
		pos: np.ndarray
			 3-D array with the position of the tracers.
		vel: np.ndarray
			3-D array with the velocities of the tracers.
		boxsize: float
			size of the simulation's box.
		num_threads: int
			number of threads to use.

	Returns:
		real_tpcf: np.array
			1-D array with the real space tpcf.

	'''

    real_tpcf = tpcf(pos, r, period=boxsize, num_threads=num_threads)

    return real_tpcf
Beispiel #3
0
def reference_sim_tpcf(pos1, redges, BoxSize, randoms=None, pos2=None):
    """Reference 1D 2PCF using halotools"""
    from halotools.mock_observables import tpcf

    estimator = 'Natural' if randoms is None else 'Landy-Szalay'
    do_auto = True if pos2 is None else False
    return tpcf(pos1, redges, period=BoxSize, sample2=pos2, randoms=randoms,
                estimator=estimator, do_auto=do_auto)
Beispiel #4
0
def reference_sim_tpcf(pos1, redges, BoxSize, randoms=None, pos2=None):
    """Reference 1D 2PCF using halotools"""
    from halotools.mock_observables import tpcf

    estimator = 'Natural' if randoms is None else 'Landy-Szalay'
    do_auto = True if pos2 is None else False
    return tpcf(pos1, redges, period=BoxSize, sample2=pos2, randoms=randoms,
                estimator=estimator, do_auto=do_auto)
def popAndCorr(halocat, model, cat, params={}, do_jackknife=True, min_ptcl=MIN_PTCL, rbins=RBINS):
    '''Populate a halocat with a model and calculate the tpcf, tpcf_1h, tpcf_2h, and projected corr fun'''
    print 'Min Num Particles: %d\t%d bins' % (min_ptcl, len(rbins))
    model.param_dict.update(params)  # insert new params into model
    print model.param_dict
    # Note: slow
    model.populate_mock(halocat, Num_ptcl_requirement=min_ptcl)

    # Now, calculate with Halotools builtin
    x, y, z = [model.mock.galaxy_table[c] for c in ['x', 'y', 'z']]
    # mask = model.mock.galaxy_table['halo_mvir'] < 1e15/cat.h
    pos = return_xyz_formatted_array(x, y, z)  # , mask = mask)

    t0 = time()

    # TODO N procs
    if do_jackknife:
        Nrands = 5 
        Nsub = 5 
        randoms = np.random.random(
            (pos.shape[0] * Nrands, 3)) * model.mock.Lbox * cat.h  # Solution to NaNs: Just f**k me up with randoms
        xi_all, xi_cov = tpcf_jackknife(pos * cat.h, randoms, rbins, period=model.mock.Lbox * cat.h,
                                        num_threads=cpu_count(), Nsub=Nsub)
    elif CORRFUNC:
        # write bins to file
        BINDIR = dirname(abspath(__file__))  # location of files with bin edges
        with open(join(BINDIR, './binfile'), 'w') as f:
            for low, high in zip(RBINS[:-1], RBINS[1:]):
                f.write('\t%f\t%f\n' % (low, high))

        # countpairs requires casting in order to work right.
        xi_all = countpairs_xi(model.mock.Lbox * cat.h, cpu_count(), join(BINDIR, './binfile'),
                               x.astype('float32') * cat.h, y.astype('float32') * cat.h, z.astype('float32') * cat.h)
        xi_all = np.array(xi_all, dtype='float64')[:, 3]

    else:

        xi_all = tpcf(pos * cat.h, rbins, period=model.mock.Lbox * cat.h, num_threads=cpu_count())

    print 'Corr Calc Time: %.3f s' % (time() - t0)
    # halo_hostid = model.mock.galaxy_table['halo_id']

    # xi_1h, xi_2h = tpcf_one_two_halo_decomp(pos*cat.h,
    #                                        halo_hostid, rbins,
    #                                        period=model.mock.Lbox*cat.h, num_threads=cpu_count(),
    #                                        max_sample_size=1e7)

    # wp_all = wp(pos*cat.h, RBINS, PI_MAX, period=model.mock.Lbox*cat.h, num_threads = cpu_count())
    rbin_centers = (rbins[1:] + rbins[:-1]) / 2
    output = np.stack([rbin_centers, xi_all])  # , xi_1h, xi_2h])

    if do_jackknife:
        return output, xi_cov
    else:
        return output
Beispiel #6
0
def compute_tpcf(positions: np.ndarray, boxsize: float = 100.):
    """
    Computes the real space two point correlation function using halotools

    Args:
        postions: 3D array with the cartesian coordiantes of the tracers.
        boxsize: box size of the simulation in the same units as positions.

    """

    if boxsize < 150:
        r = np.geomspace(0.3, 10.0, 7)
    else:
        r = np.geomspace(0.3, 30.0, 18)
    r_c = 0.5 * (r[1:] + r[:-1])

    real_tpcf = tpcf(positions,
                     rbins=r,
                     period=boxsize,
                     estimator="Landy-Szalay")

    return r_c, real_tpcf
Beispiel #7
0
def build_xi_nbar_gmf(Mr=21):
    '''
    Build "data" xi, nbar, GMF values and write to file
    '''
    model = PrebuiltHodModelFactory('zheng07', threshold = -1.0*np.float(Mr))
    model.populate_mock(halocat = halocat , enforce_PBC = False) # population mock realization
    pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

    # write xi
    rbins = hardcoded_xi_bins()
    rmax = rbins.max()
    approx_cell1_size = [rmax, rmax, rmax]
    approx_cell2_size = approx_cell1_size
    approx_cellran_size = [rmax, rmax, rmax]
    period = np.array([Lbox , Lbox , Lbox]) 
    data_xir = tpcf(
            sample1, rbins, sample2 = sample2, 
            randoms=randoms, period = period, 
            max_sample_size=int(1e4), estimator='Landy-Szalay', 
            approx_cell1_size=approx_cell1_size, 
            approx_cellran_size=approx_cellran_size, 
            RR_precomputed = RR, 
            NR_precomputed = NR1) 
    output_file = ''.join([util.dat_dir(), 'xir.Mr', str(Mr), '.dat'])
    np.savetxt(output_file, data_xir)

    # write nbar values
    nbar = model.mock.number_density
    output_file = ''.join([util.dat_dir(), 'nbar.Mr', str(Mr), '.dat'])
    np.savetxt(output_file, [nbar])

    # write GMF
    rich = richness(model.mock.compute_fof_group_ids())
    gmf = GMF(rich)  # GMF
    output_file = ''.join([util.dat_dir(), 'gmf.Mr', str(Mr), '.dat'])
    np.savetxt(output_file, gmf)

    return None
Beispiel #8
0
def Subvolume_FullvolumeCut(N_sub, ratio=False):
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions
    and actual pair counts, CUT into subvolumes of the same size *AFTER*
    populate mock 


    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test',
        '.Nsub',
        str(N_sub), '.p'
    ])

    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges()

    # Entire MultiDark Volume (No Periodic Boundary Conditions)
    model = PrebuiltHodModelFactory('zheng07', threshold=-21)
    halocat = CachedHaloCatalog(simname='multidark',
                                redshift=0,
                                halo_finder='rockstar')

    sub_RR = data_RR(box='md_sub')
    sub_randoms = data_random(box='md_sub')
    sub_NR = len(sub_randoms)

    rmax = xi_bin.max()
    full_approx_cell1_size = [rmax, rmax, rmax]
    full_approx_cellran_size = [rmax, rmax, rmax]

    model.populate_mock(halocat, enforce_PBC=False)
    subvol_id = util.mk_id_column(table=model.mock.galaxy_table)
    full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

    # Full Volume
    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else:
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname='multidark',
                                    redshift=0,
                                    halo_finder='rockstar')

        full_randoms = data_random(box='md_all')
        full_RR = data_RR(box='md_all')
        full_NR = len(full_randoms)

        rmax = xi_bin.max()
        full_approx_cell1_size = [rmax, rmax, rmax]
        full_approx_cellran_size = [rmax, rmax, rmax]

        model.populate_mock(halocat, enforce_PBC=False)
        full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

        full_xi = tpcf(full_pos,
                       xi_bin,
                       randoms=full_randoms,
                       period=None,
                       do_auto=True,
                       do_cross=False,
                       num_threads=5,
                       max_sample_size=int(full_pos.shape[0]),
                       estimator='Natural',
                       approx_cell1_size=full_approx_cell1_size,
                       approx_cellran_size=full_approx_cellran_size,
                       RR_precomputed=full_RR,
                       NR_precomputed=full_NR)
        data_dump = {}
        data_dump['full_xi'] = full_xi

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 full_xi,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Full Volume')

    if os.path.isfile(pickle_file):
        fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list']
        fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg']
    else:
        data_dump['fullcut_xi'] = {}
        fullcut_xi_list = []
        fullcut_xi_tot = np.zeros(len(xi_bin) - 1)
        for id in np.unique(subvol_id)[:N_sub]:
            print 'Subvolume ', id
            in_cut = np.where(subvol_id == id)

            fullcut_pos = full_pos[in_cut]

            fullcut_xi = tpcf(fullcut_pos,
                              xi_bin,
                              randoms=sub_randoms,
                              period=None,
                              do_auto=True,
                              do_cross=False,
                              num_threads=5,
                              max_sample_size=int(fullcut_pos.shape[0]),
                              estimator='Natural',
                              approx_cell1_size=full_approx_cell1_size,
                              approx_cellran_size=full_approx_cellran_size,
                              RR_precomputed=sub_RR,
                              NR_precomputed=sub_NR)

            fullcut_xi_list.append(fullcut_xi)
            fullcut_xi_tot += fullcut_xi

        fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub)
        data_dump['fullcut_xi']['fullcut_xi_list'] = fullcut_xi_list
        data_dump['fullcut_xi']['fullcut_xi_avg'] = fullcut_xi_avg

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 fullcut_xi_avg,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Full Volume Cut Average')
    else:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 fullcut_xi_avg / full_xi,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Full Volume Cut Average')

    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs)
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname='multidark',
                                        redshift=0,
                                        halo_finder='rockstar')
        sub_RR = data_RR(box='md_sub')
        sub_randoms = data_random(box='md_sub')
        sub_NR = len(sub_randoms)

        sub_xis_list = []
        sub_xis = np.zeros(len(full_xi))

        for ii in range(1, N_sub):
            print 'Subvolume ', ii
            # randomly sample one of the subvolumes
            rint = ii  #np.random.randint(1, 125)
            simsubvol = lambda x: util.mask_func(x, rint)
            sub_model.populate_mock(sub_halocat,
                                    masking_function=simsubvol,
                                    enforce_PBC=False)

            pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y',
                                       'z')

            xi, yi, zi = util.random_shifter(rint)
            temp_randoms = sub_randoms.copy()
            temp_randoms[:, 0] += xi
            temp_randoms[:, 1] += yi
            temp_randoms[:, 2] += zi

            rmax = xi_bin.max()
            sub_approx_cell1_size = [rmax, rmax, rmax]
            sub_approx_cellran_size = [rmax, rmax, rmax]

            sub_xi = tpcf(pos,
                          xi_bin,
                          randoms=temp_randoms,
                          period=None,
                          do_auto=True,
                          do_cross=False,
                          num_threads=5,
                          max_sample_size=int(pos.shape[0]),
                          estimator='Natural',
                          approx_cell1_size=sub_approx_cell1_size,
                          approx_cellran_size=sub_approx_cellran_size,
                          RR_precomputed=sub_RR,
                          NR_precomputed=sub_NR)

            label = None
            if ii == N_sub - 1:
                label = 'Subvolumes'

            sub_xis += sub_xi
            sub_xis_list.append(sub_xi)

        sub_xi_avg = sub_xis / np.float(N_sub)

        data_dump['Natural'] = {}
        data_dump['Natural']['sub_xi_avg'] = sub_xi_avg
        data_dump['Natural']['sub_xis_list'] = sub_xis_list
    else:
        sub_xis_list = data_dump['Natural']['sub_xis_list']
        sub_xi_avg = data_dump['Natural']['sub_xi_avg']

    if not os.path.isfile(pickle_file):
        pickle.dump(data_dump, open(pickle_file, 'wb'))

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 sub_xi_avg,
                 lw=2,
                 ls='--',
                 c=pretty_colors[3],
                 label='Subvolume')
    else:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 sub_xi_avg / full_xi,
                 lw=2,
                 ls='--',
                 c=pretty_colors[3],
                 label='Subvolume')

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')

    if not ratio:
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else:
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$",
                       fontsize=25)

    sub.legend(loc='lower left')

    if ratio:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub',
            str(N_sub), '.ratio.png'
        ])
    else:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub',
            str(N_sub), '.png'
        ])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None
Beispiel #9
0
def build_MCMC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25):
    ''' Build covariance matrix used in MCMC for the full nbar, xi, gmf data vector
    using realisations of galaxy mocks for "data" HOD parameters in the 
    halos from the other subvolumes (subvolume 1 to subvolume 125) of
    the simulation. Covariance matrices for different sets of observables
    can be extracted from the full covariance matrix by slicing through 
    the indices. 

    '''
    nbars = []
    xir = []
    gmfs = []

    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname='multidark',
                                redshift=0,
                                halo_finder='rockstar')
    ###model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}

    #some settings for tpcf calculations
    rbins = xi_binedges()
    rmax = rbins.max()
    approx_cell1_size = [rmax, rmax, rmax]
    approx_cellran_size = [rmax, rmax, rmax]

    #load randoms and RRs

    randoms = data_random(box='md_sub')
    RR = data_RR(box='md_sub')
    NR = len(randoms)

    for i in xrange(1, 125):
        print 'mock#', i

        # populate the mock subvolume
        ###mocksubvol = lambda x: util.mask_func(x, i)
        ###model.populate_mock(halocat,
        ###                    masking_function=mocksubvol,
        ###                    enforce_PBC=False)
        model.populate_mock(halocat)
        # returning the positions of galaxies in the entire volume
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        # masking out the galaxies outside the subvolume i
        pos = util.mask_galaxy_table(pos, i)
        # calculate nbar
        print "shape of pos", pos.shape
        nbars.append(len(pos) / 200**3.)
        # translate the positions of randoms to the new subbox
        xi0, yi0, zi0 = util.random_shifter(i)
        temp_randoms = randoms.copy()
        temp_randoms[:, 0] += xi0
        temp_randoms[:, 1] += yi0
        temp_randoms[:, 2] += zi0
        #calculate xi(r)
        xi = tpcf(pos,
                  rbins,
                  pos,
                  randoms=temp_randoms,
                  period=None,
                  max_sample_size=int(3e5),
                  estimator='Natural',
                  approx_cell1_size=approx_cell1_size,
                  approx_cellran_size=approx_cellran_size,
                  RR_precomputed=RR,
                  NR_precomputed=NR)
        xir.append(xi)
        # calculate gmf

        nbar = len(pos) / 200**3.
        b = b_normal * (nbar)**(-1. / 3)
        groups = pyfof.friends_of_friends(pos, b)
        w = np.array([len(x) for x in groups])
        gbins = gmf_bins()
        gmf = np.histogram(w, gbins)[0] / 200.**3.
        gmfs.append(gmf)

    # save nbar variance
    nbar_var = np.var(nbars, axis=0, ddof=1)
    nbar_file = ''.join([util.obvs_dir(), 'nbar_var.Mr', str(Mr), '.dat'])
    np.savetxt(nbar_file, [nbar_var])

    # write full covariance matrix of various combinations of the data
    # and invert for the likelihood evaluations

    # --- covariance for all three ---
    fulldatarr = np.hstack(
        (np.array(nbars).reshape(len(nbars),
                                 1), np.array(xir), np.array(gmfs)))
    fullcov = np.cov(fulldatarr.T)
    fullcorr = np.corrcoef(fulldatarr.T)

    # and save the covariance matrix
    nopoisson_file = ''.join([
        util.obvs_dir(), 'MCMC.nbar_xi_gmf_cov', '.no_poisson', '.Mr',
        str(Mr), '.bnorm',
        str(round(b_normal, 2)), '.dat'
    ])
    np.savetxt(nopoisson_file, fullcov)
    return None
Beispiel #10
0
def Subvolume_FullvolumeCut(N_sub, ratio=False): 
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions
    and actual pair counts, CUT into subvolumes of the same size *AFTER*
    populate mock 


    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 
        'xi_subvolume_fullvolume_cut_test', 
        '.Nsub', str(N_sub), 
        '.p'])
    
    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges() 
    
    # Entire MultiDark Volume (No Periodic Boundary Conditions) 
    model = PrebuiltHodModelFactory('zheng07', threshold=-21)
    halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')

    sub_RR = data_RR(box='md_sub')
    sub_randoms = data_random(box='md_sub')
    sub_NR = len(sub_randoms)

    rmax = xi_bin.max()
    full_approx_cell1_size = [rmax , rmax , rmax]
    full_approx_cellran_size = [rmax , rmax , rmax]

    model.populate_mock(halocat, enforce_PBC=False)
    subvol_id = util.mk_id_column(table=model.mock.galaxy_table)
    full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
    
    # Full Volume 
    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else: 
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')

        full_randoms = data_random(box='md_all')
        full_RR = data_RR(box='md_all')
        full_NR = len(full_randoms)

        rmax = xi_bin.max()
        full_approx_cell1_size = [rmax , rmax , rmax]
        full_approx_cellran_size = [rmax , rmax , rmax]

        model.populate_mock(halocat, enforce_PBC=False)
        full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        
        full_xi = tpcf(
                full_pos, xi_bin, 
                randoms=full_randoms, period=None, 
                do_auto=True,
                do_cross=False, 
                num_threads=5, 
                max_sample_size=int(full_pos.shape[0]), estimator='Natural', 
                approx_cell1_size=full_approx_cell1_size, 
                approx_cellran_size=full_approx_cellran_size,
                RR_precomputed = full_RR,
                NR_precomputed = full_NR)
        data_dump = {} 
        data_dump['full_xi'] = full_xi
    
    if not ratio:  
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, 
                lw=2, ls='-', c='k', label=r'Full Volume') 

    if os.path.isfile(pickle_file):
        fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list']
        fullcut_xi_avg =  data_dump['fullcut_xi']['fullcut_xi_avg']
    else: 
        data_dump['fullcut_xi'] = {}
        fullcut_xi_list = [] 
        fullcut_xi_tot = np.zeros(len(xi_bin)-1)
        for id in np.unique(subvol_id)[:N_sub]: 
            print 'Subvolume ', id
            in_cut = np.where(subvol_id == id) 

            fullcut_pos = full_pos[in_cut]

            fullcut_xi = tpcf(
                    fullcut_pos, xi_bin, 
                    randoms=sub_randoms, period=None, 
                    do_auto=True,
                    do_cross=False, 
                    num_threads=5, 
                    max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', 
                    approx_cell1_size=full_approx_cell1_size, 
                    approx_cellran_size=full_approx_cellran_size,
                    RR_precomputed=sub_RR,
                    NR_precomputed=sub_NR)

            fullcut_xi_list.append(fullcut_xi)
            fullcut_xi_tot += fullcut_xi
        
        fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub)
        data_dump['fullcut_xi']['fullcut_xi_list']= fullcut_xi_list
        data_dump['fullcut_xi']['fullcut_xi_avg']= fullcut_xi_avg

    if not ratio:  
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg, 
                lw=2, ls='-', c='k', label=r'Full Volume Cut Average') 
    else: 
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg/full_xi, 
                lw=2, ls='-', c='k', label=r'Full Volume Cut Average') 
    
    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs) 
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
        sub_RR = data_RR(box='md_sub')
        sub_randoms = data_random(box='md_sub')
        sub_NR = len(sub_randoms)
    
        
        sub_xis_list = [] 
        sub_xis = np.zeros(len(full_xi)) 

        for ii in range(1,N_sub): 
            print 'Subvolume ', ii 
            # randomly sample one of the subvolumes
            rint = ii #np.random.randint(1, 125)
            simsubvol = lambda x: util.mask_func(x, rint)
            sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False)
               
            pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z')

            xi, yi , zi = util.random_shifter(rint)
            temp_randoms = sub_randoms.copy()
            temp_randoms[:,0] += xi
            temp_randoms[:,1] += yi
            temp_randoms[:,2] += zi
            
            rmax = xi_bin.max()
            sub_approx_cell1_size = [rmax , rmax , rmax]
            sub_approx_cellran_size = [rmax , rmax , rmax]

            sub_xi = tpcf(
                    pos, xi_bin,  
                    randoms=temp_randoms, 
                    period = None, 
                    do_auto=True,
                    do_cross=False, 
                    num_threads=5, 
                    max_sample_size=int(pos.shape[0]), 
                    estimator='Natural', 
                    approx_cell1_size = sub_approx_cell1_size, 
                    approx_cellran_size = sub_approx_cellran_size,
                    RR_precomputed=sub_RR,
                    NR_precomputed=sub_NR)

            label = None 
            if ii == N_sub - 1: 
                label = 'Subvolumes'
            
            sub_xis += sub_xi
            sub_xis_list.append(sub_xi)

        sub_xi_avg = sub_xis/np.float(N_sub)

        data_dump['Natural'] = {} 
        data_dump['Natural']['sub_xi_avg'] = sub_xi_avg
        data_dump['Natural']['sub_xis_list'] = sub_xis_list 
    else: 
        sub_xis_list = data_dump['Natural']['sub_xis_list']
        sub_xi_avg = data_dump['Natural']['sub_xi_avg'] 

    if not os.path.isfile(pickle_file): 
        pickle.dump(data_dump, open(pickle_file, 'wb')) 

    if not ratio: 
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, 
                lw=2, ls='--', c=pretty_colors[3], label='Subvolume')
    else: 
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, 
                lw=2, ls='--', c=pretty_colors[3], label='Subvolume')

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')
    
    if not ratio: 
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else: 
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25)

    sub.legend(loc='lower left')
    
    if ratio: 
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png'])
    else:
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png'])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None 
Beispiel #11
0
    def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']):
        '''
        Given theta, sum_stat calculates the observables from our forward model

        Parameters
        ----------
        theta : (self explanatory)
        prior_range : If specified, checks to make sure that theta is within the prior range.
        '''
        self.model.param_dict['logM0'] = theta[0]
        self.model.param_dict['sigma_logM'] = np.exp(theta[1])
        self.model.param_dict['logMmin'] = theta[2]
        self.model.param_dict['alpha'] = theta[3]
        self.model.param_dict['logM1'] = theta[4]

        rbins = xi_binedges()
        rmax = rbins.max()
        approx_cell1_size = [rmax, rmax, rmax]
        approx_cellran_size = [rmax, rmax, rmax]

        if prior_range is None:

            self.model.populate_mock(self.halocat, enforce_PBC=False)
            pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y',
                                       'z')
            obvs = []

            for obv in observables:
                if obv == 'nbar':
                    obvs.append(len(pos) /
                                1000.**3.)  # nbar of the galaxy catalog
                elif obv == 'gmf':
                    nbar = len(pos) / 1000**3.
                    b = self.b_normal * (nbar)**(-1. / 3)
                    groups = pyfof.friends_of_friends(pos, b)
                    w = np.array([len(x) for x in groups])
                    gbins = data_gmf_bins()
                    gmf = np.histogram(w, gbins)[0] / (1000.**3.)
                    obvs.append(gmf)
                elif obv == 'xi':
                    greek_xi = tpcf(pos,
                                    rbins,
                                    pos,
                                    randoms=randoms,
                                    period=None,
                                    max_sample_size=int(3e5),
                                    estimator='Natural',
                                    approx_cell1_size=approx_cell1_size,
                                    approx_cellran_size=approx_cellran_size,
                                    RR_precomputed=self.RR,
                                    NR_precomputed=self.NR)
                    obvs.append(greek_xi)
                else:
                    raise NotImplementedError(
                        'Only nbar 2pcf, gmf implemented so far')

            return obvs

        else:
            if np.all((prior_range[:, 0] < theta)
                      & (theta < prior_range[:, 1])):
                # if all theta_i is within prior range ...
                try:

                    self.model.populate_mock(self.halocat)
                    pos = three_dim_pos_bundle(self.model.mock.galaxy_table,
                                               'x', 'y', 'z')
                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(len(pos) /
                                        1000**3.)  # nbar of the galaxy catalog
                        elif obv == 'gmf':
                            nbar = len(pos) / 1000**3.
                            b = self.b_normal * (nbar)**(-1. / 3)
                            groups = pyfof.friends_of_friends(pos, b)
                            w = np.array([len(x) for x in groups])
                            gbins = data_gmf_bins()
                            gmf = np.histogram(w, gbins)[0] / (1000.**3.)
                            obvs.append(gmf)
                        elif obv == 'xi':
                            greek_xi = tpcf(
                                pos,
                                rbins,
                                pos,
                                randoms=randoms,
                                period=None,
                                max_sample_size=int(3e5),
                                estimator='Natural',
                                approx_cell1_size=approx_cell1_size,
                                approx_cellran_size=approx_cellran_size,
                                RR_precomputed=self.RR,
                                NR_precomputed=self.NR)
                            obvs.append(greek_xi)
                        else:
                            raise NotImplementedError(
                                'Only nbar, tpcf, and gmf are implemented so far'
                            )

                    return obvs

                except ValueError:

                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(10.)
                        elif obv == 'gmf':
                            bins = data_gmf_bins()
                            obvs.append(np.ones_like(bins)[:-1] * 1000.)
                        elif obv == 'xi':
                            obvs.append(np.zeros(len(xi_binedges()[:-1])))
                    return obvs
            else:
                obvs = []
                for obv in observables:
                    if obv == 'nbar':
                        obvs.append(10.)
                    elif obv == 'gmf':
                        bins = data_gmf_bins()
                        obvs.append(np.ones_like(bins)[:-1] * 1000.)
                    elif obv == 'xi':
                        obvs.append(np.zeros(len(xi_binedges()[:-1])))
                return obvs
Beispiel #12
0
import numpy as np
from halotools.mock_observables import tpcf, s_mu_tpcf, tpcf_multipole
import matplotlib.pyplot as plt

h = 0.6726
b = 2.23
pos = np.fromfile('sample_input_gal_mx3_float32_0-1100_mpc.dat',
                  dtype=np.float32).reshape(-1, 4)[:, 1:4]*h
s_bins, mu_bins = np.arange(5, 155, 5), np.arange(0, 1.05, 0.05)
r = (s_bins[:-1] + s_bins[1:])/2
xi = tpcf(pos, s_bins, period=1100)
xi_s_mu = s_mu_tpcf(pos, s_bins, mu_bins, period=1100)
xi_0 = tpcf_multipole(xi_s_mu, mu_bins, order=0)
xi_2 = tpcf_multipole(xi_s_mu, mu_bins, order=2)
fig, ax = plt.subplots()
ax.plot(r, xi*np.square(r), 'o:', label='xi')
ax.plot(r, xi_0*np.square(r), '+:', label='xi_0')
ax.plot(r, xi_2*np.square(r), 'x-.', label='xi_2')
fig.legend()
fig.savefig('correlation.pdf')
Beispiel #13
0
    def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']):
        '''
        Given theta, sum_stat calculates the observables from our forward model

        Parameters
        ----------
        theta : (self explanatory)
        prior_range : If specified, checks to make sure that theta is within the prior range.
        '''
        self.model.param_dict['logM0'] = theta[0]
        self.model.param_dict['sigma_logM'] = np.exp(theta[1])
        self.model.param_dict['logMmin'] = theta[2]
        self.model.param_dict['alpha'] = theta[3]
        self.model.param_dict['logM1'] = theta[4]

        rbins = xi_binedges()
        rmax = rbins.max()
        approx_cell1_size = [rmax , rmax , rmax]
        approx_cellran_size = [rmax , rmax , rmax]

        if prior_range is None:
            
            self.model.populate_mock(self.halocat)
            pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
            obvs = []

            for obv in observables:
                if obv == 'nbar':
                    obvs.append(len(pos) / 1000.**3.)       # nbar of the galaxy catalog
                elif obv == 'gmf':
                    nbar = len(pos) / 1000**3.
    		    b = self.b_normal * (nbar)**(-1./3) 
    		    groups = pyfof.friends_of_friends(pos , b)
    		    w = np.array([len(x) for x in groups])
    		    gbins =data_gmf_bins()
    		    gmf = np.histogram(w , gbins)[0] / (1000.**3.)
                    obvs.append(gmf)   
                elif obv == 'xi':
                    greek_xi = tpcf(
                            pos, rbins,  
                            period=self.model.mock.Lbox, 
                            max_sample_size=int(3e5), estimator='Natural', 
                            approx_cell1_size=approx_cell1_size)
                    obvs.append(greek_xi)
                else:
                    raise NotImplementedError('Only nbar 2pcf, gmf implemented so far')

            return obvs

        else:
            if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])):
                # if all theta_i is within prior range ...
                try:


                    self.model.populate_mock(self.halocat) 
                    pos=three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
            	    obvs = []
            	    for obv in observables:
                        if obv == 'nbar':
                    	    obvs.append(len(pos) / 1000**3.)       # nbar of the galaxy catalog
                        elif obv == 'gmf':
                    	    nbar = len(pos) / 1000**3.
    		    	    b = self.b_normal * (nbar)**(-1./3) 
    		    	    groups = pyfof.friends_of_friends(pos , b)
    		    	    w = np.array([len(x) for x in groups])
    		    	    gbins =data_gmf_bins()
    		    	    gmf = np.histogram(w , gbins)[0] / (1000.**3.)
                    	    obvs.append(gmf)   
                        elif obv == 'xi':
                            greek_xi = tpcf(
                                    pos, rbins, period=self.model.mock.Lbox, 
                                    max_sample_size=int(3e5), estimator='Natural', 
                                    approx_cell1_size=approx_cell1_size)
                            obvs.append(greek_xi)
                        else:
                            raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far')

                    return obvs

                except ValueError:

                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(10.)
                        elif obv == 'gmf':
                            bins = data_gmf_bins()
                            obvs.append(np.ones_like(bins)[:-1]*1000.)
                        elif obv == 'xi':
                            obvs.append(np.zeros(len(xi_binedges()[:-1])))
                    return obvs
            else:
                obvs = []
                for obv in observables:
                    if obv == 'nbar':
                        obvs.append(10.)
                    elif obv == 'gmf':
                        bins = data_gmf_bins()
                        obvs.append(np.ones_like(bins)[:-1]*1000.)
                    elif obv == 'xi':
                        obvs.append(np.zeros(len(xi_binedges()[:-1])))
                return obvs
Beispiel #14
0
    def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']):
        '''
        Given theta, sum_stat calculates the observables from our forward model

        Parameters
        ----------
        theta : (self explanatory)
        prior_range : If specified, checks to make sure that theta is within the prior range.
        '''
        self.model.param_dict['logM0'] = theta[0]
        self.model.param_dict['sigma_logM'] = np.exp(theta[1])
        self.model.param_dict['logMmin'] = theta[2]
        self.model.param_dict['alpha'] = theta[3]
        self.model.param_dict['logM1'] = theta[4]

        rbins = xi_binedges()
        rmax = rbins.max()
        period = None
        approx_cell1_size = [rmax , rmax , rmax]
        approx_cellran_size = [rmax , rmax , rmax]

        if prior_range is None:
            rint = np.random.randint(1, 125)
            ####simsubvol = lambda x: util.mask_func(x, rint)
            ####self.model.populate_mock(self.halocat,
            ####                masking_function=simsubvol,
            ####                enforce_PBC=False)
            self.model.populate_mock(self.halocat)
                        
            pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
            pos = util.mask_galaxy_table(pos , rint) 

            xi , yi , zi = util.random_shifter(rint)
            temp_randoms = self.randoms.copy()
            temp_randoms[:,0] += xi
            temp_randoms[:,1] += yi
            temp_randoms[:,2] += zi

            obvs = []
            for obv in observables:
                if obv == 'nbar':
                    obvs.append(len(pos) / 200**3.)       # nbar of the galaxy catalog
                elif obv == 'gmf':
                    #compute group richness 
                    nbar = len(pos) / 200**3.
    		    b = self.b_normal * (nbar)**(-1./3) 
    		    groups = pyfof.friends_of_friends(pos , b)
    		    w = np.array([len(x) for x in groups])
    		    gbins = data_gmf_bins()
    		    gmf = np.histogram(w , gbins)[0] / (200.**3.)
                    obvs.append(gmf)   
                elif obv == 'xi':
                    greek_xi = tpcf(
                        pos, rbins, pos, 
                        randoms=temp_randoms, period = period, 
                        max_sample_size=int(1e5), estimator='Natural', 
                        approx_cell1_size=approx_cell1_size, 
                        approx_cellran_size=approx_cellran_size,
                        RR_precomputed = self.RR,
	                NR_precomputed = self.NR)

                    obvs.append(greek_xi)
                else:
                    raise NotImplementedError('Only nbar 2pcf, gmf implemented so far')

            return obvs

        else:
            if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])):
                # if all theta_i is within prior range ...
                try:
                    rint = np.random.randint(1, 125)
                    simsubvol = lambda x: util.mask_func(x, rint)
                    self.model.populate_mock(self.halocat,
                                masking_function=simsubvol,
                                enforce_PBC=False)
           
                    pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
                    #imposing mask on the galaxy table
                    pos = util.mask_galaxy_table(pos , rint) 
            	    xi , yi , zi = util.random_shifter(rint)
            	    temp_randoms = self.randoms.copy()
            	    temp_randoms[:,0] += xi
            	    temp_randoms[:,1] += yi
            	    temp_randoms[:,2] += zi
            	    obvs = []

            	    for obv in observables:
                        if obv == 'nbar':
                    	    obvs.append(len(pos) / 200**3.)       # nbar of the galaxy catalog
                        elif obv == 'gmf':
                            nbar = len(pos) / 200**3.
    		            b = self.b_normal * (nbar)**(-1./3) 
    		            groups = pyfof.friends_of_friends(pos , b)
    		            w = np.array([len(x) for x in groups])
    		    	    gbins =data_gmf_bins()
    		            gmf = np.histogram(w , gbins)[0] / (200.**3.)
                    	    obvs.append(gmf)   
                        elif obv == 'xi':
                    	    greek_xi = tpcf(
                                     pos, rbins, pos, 
                        	     randoms=temp_randoms, period = period, 
                                     max_sample_size=int(1e5), estimator='Natural', 
                                     approx_cell1_size=approx_cell1_size, 
                                     approx_cellran_size=approx_cellran_size,
                                     RR_precomputed = self.RR,
	                             NR_precomputed = self.NR)

                    	    obvs.append(greek_xi)
                        else:
                            raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far')

                    return obvs

                except ValueError:

                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(10.)
                        elif obv == 'gmf':
                            bins = data_gmf_bins()
                            obvs.append(np.ones_like(bins)[:-1]*1000.)
                        elif obv == 'xi':
                            obvs.append(np.zeros(len(xi_binedges()[:-1])))
                    return obvs
            else:
                obvs = []
                for obv in observables:
                    if obv == 'nbar':
                        obvs.append(10.)
                    elif obv == 'gmf':
                        bins = data_gmf_bins()
                        obvs.append(np.ones_like(bins)[:-1]*1000.)
                    elif obv == 'xi':
                        obvs.append(np.zeros(len(xi_binedges()[:-1])))
                return obvs
Beispiel #15
0
    np.median(mstar) / 1.e8, (mstar.max() - mstar.min()) / 1.e10))
plt.xscale('log')
plt.yscale('log')
plt.savefig('mstar_dist' + nn + '.pdf', bbox_inches='tight')
# import sys; sys.exit()

# ACF
galpos = np.array([g.pos.d for g in sim.galaxies]) / 1.e3  # cMpc/h
#  the real space radial bins in which pairs are counted
rsize = 15
rbins = np.logspace(-1, 1.25, rsize)  # cMpc/h
rbin_centers = (rbins[1:] + rbins[:-1]) / 2.
# real space TPCF
xi_all = tpcf(galpos,
              rbins,
              period=boxsize,
              num_threads='max',
              estimator='Landy-Szalay')

plt.figure()
plt.plot(rbin_centers, xi_all, label='All galaxies', color='k', marker='o')
plt.loglog()
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
plt.xlabel(r'$r $  $\rm{[Mpc h^{-1}]}$', fontsize=25)
plt.ylabel(r'$\\xi(r)$', fontsize=25)
plt.legend(loc='best', fontsize=20)
plt.savefig('realspace_xi_' + nn + '.pdf', bbox_inches='tight')

# fit power law to correlation
from scipy.optimize import curve_fit
Beispiel #16
0
def build_nbar_xi_gmf_cov(Mr=21):
    ''' Build covariance matrix for the full nbar, xi, gmf data vector
    using realisations of galaxy mocks for "data" HOD parameters in the 
    halos from the multidark simulation. Covariance matrices for different sets of observables
    can be extracted from the full covariance matrix by slicing through 
    the indices. 

    '''
    nbars = []
    xir = []
    gmfs = []

    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname='multidark',
                                redshift=0,
                                halo_finder='rockstar')
    #some settings for tpcf calculations
    rbins = hardcoded_xi_bins()

    for i in xrange(1, 125):
        print 'mock#', i

        # populate the mock subvolume
        model.populate_mock(halocat)
        # returning the positions of galaxies
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        # calculate nbar
        nbars.append(len(pos) / 1000**3.)
        # translate the positions of randoms to the new subbox
        #calculate xi(r)
        xi = tpcf(pos,
                  rbins,
                  period=model.mock.Lbox,
                  max_sample_size=int(2e5),
                  estimator='Landy-Szalay')
        xir.append(xi)
        # calculate gmf
        nbar = len(pos) / 1000**3.
        b_normal = 0.75
        b = b_normal * (nbar)**(-1. / 3)
        groups = pyfof.friends_of_friends(pos, b)
        w = np.array([len(x) for x in groups])
        gbins = gmf_bins()
        gmf = np.histogram(w, gbins)[0] / (1000.**3.)
        gmfs.append(gmf)  # GMF
    # save nbar variance
    nbar_var = np.var(nbars, axis=0, ddof=1)
    nbar_file = ''.join(
        [util.multidat_dir(), 'abc_nbar_var.Mr',
         str(Mr), '.dat'])
    np.savetxt(nbar_file, [nbar_var])

    # write full covariance matrix of various combinations of the data
    # and invert for the likelihood evaluations

    # --- covariance for all three ---
    fulldatarr = np.hstack(
        (np.array(nbars).reshape(len(nbars),
                                 1), np.array(xir), np.array(gmfs)))
    fullcov = np.cov(fulldatarr.T)
    fullcorr = np.corrcoef(fulldatarr.T)
    # and save the covariance matrix
    nopoisson_file = ''.join([
        util.multidat_dir(), 'abc_nbar_xi_gmf_cov.no_poisson.Mr',
        str(Mr), '.dat'
    ])
    np.savetxt(nopoisson_file, fullcov)

    # and a correlation matrix
    full_corr_file = ''.join(
        [util.multidat_dir(), 'abc_nbar_xi_gmf_corr.Mr',
         str(Mr), '.dat'])
    np.savetxt(full_corr_file, fullcorr)

    return None
Beispiel #17
0
def main():

    # get simulation information
    if len(sys.argv)>1:
        sim_name = sys.argv[1]
        snapnum = int(sys.argv[2])
        shape_type = sys.argv[3]
        sample_name = sys.argv[4]
    else:
        sim_name = 'TNG300-1' # full physics high-res run
        snapnum = 99  # z=0
        shape_type = 'reduced'  # non-reduced, reduced, iterative
        sample_name = 'sample_3'

    # load a test halo catalog
    from halotools.sim_manager import CachedHaloCatalog
    halocat = CachedHaloCatalog(simname='bolplanck', halo_finder='rockstar',
                                redshift=0.0, dz_tol=0.1, version_name='halotools_v0p4')

    from halotools.empirical_models import HodModelFactory

    # define the central occupatoion model
    from halotools.empirical_models import TrivialPhaseSpace, Zheng07Cens
    cens_occ_model =  Zheng07Cens()
    cens_prof_model = TrivialPhaseSpace()

    # define the satellite occupation model
    from halotools.empirical_models import Zheng07Sats
    from halotools.empirical_models import NFWPhaseSpace, SubhaloPhaseSpace
    from intrinsic_alignments.ia_models.anisotropic_nfw_phase_space import AnisotropicNFWPhaseSpace
    sats_occ_model =  Zheng07Sats()
    #sats_prof_model = AnisotropicNFWPhaseSpace()
    sats_prof_model = SubhaloPhaseSpace('satellites', np.logspace(10.5, 15.2, 15))

    # define the alignment models
    from intrinsic_alignments.ia_models.ia_model_components import CentralAlignment,\
        RadialSatelliteAlignment,  MajorAxisSatelliteAlignment, HybridSatelliteAlignment
    central_orientation_model = CentralAlignment()
    satellite_orientation_model = RadialSatelliteAlignment()

    if sample_name == 'sample_1':
        cens_occ_model.param_dict['logMmin'] = 12.54
        cens_occ_model.param_dict['sigma_logM'] = 0.26

        sats_occ_model.param_dict['alpha'] = 1.0
        sats_occ_model.param_dict['logM0'] = 12.68
        sats_occ_model.param_dict['logM1'] = 13.48

        central_orientation_model.param_dict['central_alignment_strength'] = 0.755
        satellite_orientation_model.param_dict['satellite_alignment_strength'] = 0.279
    elif sample_name == 'sample_2':
        cens_occ_model.param_dict['logMmin'] = 11.93
        cens_occ_model.param_dict['sigma_logM'] = 0.26

        sats_occ_model.param_dict['alpha'] = 1.0
        sats_occ_model.param_dict['logM0'] = 12.05
        sats_occ_model.param_dict['logM1'] = 12.85

        central_orientation_model.param_dict['central_alignment_strength'] = 0.64
        satellite_orientation_model.param_dict['satellite_alignment_strength'] = 0.084
    elif sample_name =='sample_3':
        cens_occ_model.param_dict['logMmin'] = 11.61
        cens_occ_model.param_dict['sigma_logM'] = 0.26

        sats_occ_model.param_dict['alpha'] = 1.0
        sats_occ_model.param_dict['logM0'] = 11.8
        sats_occ_model.param_dict['logM1'] = 12.6

        central_orientation_model.param_dict['central_alignment_strength'] = 0.57172919
        satellite_orientation_model.param_dict['satellite_alignment_strength'] = 0.01995

    # combine model components
    model_instance = HodModelFactory(centrals_occupation = cens_occ_model,
                                 centrals_profile = cens_prof_model,
                                 satellites_occupation = sats_occ_model,
                                 satellites_profile = sats_prof_model,
                                 centrals_orientation = central_orientation_model,
                                 satellites_orientation = satellite_orientation_model,
                                 model_feature_calling_sequence = (
                                 'centrals_occupation',
                                 'centrals_profile',
                                 'satellites_occupation',
                                 'satellites_profile',
                                 'centrals_orientation',
                                 'satellites_orientation')
                                )
    
    # populate mock catalog
    model_instance.populate_mock(halocat)
    print("number of galaxies: ", len(model_instance.mock.galaxy_table))

    mock = model_instance.mock.galaxy_table

    # galaxy coordinates and orientations
    coords = np.vstack((mock['x'],
                        mock['y'],
                        mock['z'])).T

    orientations = np.vstack((mock['galaxy_axisA_x'],
                              mock['galaxy_axisA_y'],
                              mock['galaxy_axisA_z'])).T

    from halotools.mock_observables import tpcf, tpcf_jackknife

    rbins = np.logspace(-1,1.5,15)
    rbin_centers = (rbins[:-1]+rbins[1:])/2.0

    xi = tpcf(coords, rbins, period=halocat.Lbox)
    err=np.zeros(len(xi))

    # save measurements
    fpath = fpath = PROJECT_DIRECTORY + 'modelling_illustris/data/'
    fname = sim_name + '_' + str(snapnum) + '-' + sample_name +'_model_xi.dat'
    ascii.write([rbin_centers, xi, err],
                 fpath+fname,
                 names=['r','xi','err'],
                 overwrite=True)
Beispiel #18
0
def build_nbar_xi_gmf(Mr=21, b_normal=0.25):
    ''' Build data vector [nbar, xi, gmf] and save to file 
    This data vector is built from the zeroth slice of the multidark
    The other slices will be used for building the covariance matrix.

    Parameters
    ----------
    Mr : (int) 
        Absolute magnitude cut off M_r. Default M_r = -21.

    b_normal : (float) 
        FoF Linking length
    '''
    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
    ####model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}

    ####datsubvol = lambda x: util.mask_func(x, 0)
    ####model.populate_mock(halocat, masking_function=datsubvol, enforce_PBC=False)
    model.populate_mock(halocat)
    
    #all the things necessary for tpcf calculation
    pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
    #masking the galaxies outside the subvolume 0
    pos = util.mask_galaxy_table(pos , 0)
    rbins = xi_binedges()
    rmax = rbins.max()
    approx_cell1_size = [rmax , rmax , rmax]
    approx_cellran_size = [rmax , rmax , rmax]

    #compute number density    
    nbar = len(pos) / 200**3.
    
    # load MD subvolume randoms and RRs
    randoms = data_random(box='md_sub')
    RR = data_RR(box='md_sub')
    NR = len(randoms)
 
    #compue tpcf with Natural estimator
    data_xir = tpcf(
                 pos, rbins, pos, 
                 randoms=randoms, period=None, 
                 max_sample_size=int(2e5), estimator='Natural', 
                 approx_cell1_size=approx_cell1_size, 
                 approx_cellran_size=approx_cellran_size, 
                 RR_precomputed=RR, 
                 NR_precomputed=NR)

    fullvec = np.append(nbar, data_xir)
    
    #compute gmf
    b = b_normal * (nbar)**(-1./3) 
    groups = pyfof.friends_of_friends(pos , b)
    w = np.array([len(x) for x in groups])
    gbins = gmf_bins()
    gmf = np.histogram(w , gbins)[0] / (200.**3.)
    fullvec = np.append(fullvec, gmf)

    output_file = data_file(Mr=Mr, b_normal=b_normal)
    np.savetxt(output_file, fullvec)
    return None
Beispiel #19
0
def Subvolume_Analytic(N_sub, ratio=False): 
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    analytic 2PCF for the entire MultiDark volume

    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 
        'xi_subvolume_test', 
        '.Nsub', str(N_sub), 
        '.p'])
    
    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges() 
    
    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else: 
        # Entire MultiDark Volume (Analytic xi) 
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
        
        model.populate_mock(halocat)
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        
        # while the estimator claims to be Landy-Szalay, I highly suspect it
        # actually uses Landy-Szalay since DR pairs cannot be calculated from 
        # analytic randoms
        full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1)
        data_dump = {} 
        data_dump['full_xi'] = full_xi

    if not ratio:  
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') 
    
    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs) 
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
        RR = data_RR()
        randoms = data_random()
        NR = len(randoms)
    
    for method in ['Landy-Szalay', 'Natural']:
        
        if method == 'Landy-Szalay': 
            iii = 3
        elif method == 'Natural': 
            iii = 5
        
        if not os.path.isfile(pickle_file): 
            sub_xis_list = [] 
            sub_xis = np.zeros(len(full_xi)) 

            for ii in range(1,N_sub+1): 
                # randomly sample one of the subvolumes
                rint = ii #np.random.randint(1, 125)
                simsubvol = lambda x: util.mask_func(x, rint)
                sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False)
                   
                pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z')

                xi, yi , zi = util.random_shifter(rint)
                temp_randoms = randoms.copy()
                temp_randoms[:,0] += xi
                temp_randoms[:,1] += yi
                temp_randoms[:,2] += zi
                
                rmax = xi_bin.max()
                approx_cell1_size = [rmax , rmax , rmax]
                approx_cellran_size = [rmax , rmax , rmax]

                sub_xi = tpcf(
                        pos, xi_bin, pos, 
                        randoms=temp_randoms, 
                        period = None, 
                        max_sample_size=int(1e5), 
                        estimator=method, 
                        approx_cell1_size = approx_cell1_size, 
                        approx_cellran_size = approx_cellran_size,
                        RR_precomputed=RR,
                        NR_precomputed=NR)
                label = None 
                if ii == N_sub - 1: 
                    label = 'Subvolumes'
                
                #if not ratio: 
                #    sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii])
                sub_xis += sub_xi
                sub_xis_list.append(sub_xi)

            sub_xi_avg = sub_xis/np.float(N_sub)

            data_dump[method] = {} 
            data_dump[method]['sub_xi_avg'] = sub_xi_avg
            data_dump[method]['sub_xis_list'] = sub_xis_list 
        else: 
            sub_xis_list = data_dump[method]['sub_xis_list']
            sub_xi_avg = data_dump[method]['sub_xi_avg'] 

        if not ratio: 
            sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, 
                    lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method)
        else: 
            sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, 
                    lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method)
    
    if not os.path.isfile(pickle_file): 
        pickle.dump(data_dump, open(pickle_file, 'wb')) 

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')
    
    if not ratio: 
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else: 
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25)

    sub.legend(loc='lower left')
    
    if ratio: 
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png'])
    else:
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png'])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None 
Beispiel #20
0
def build_MCMC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25):
    ''' Build covariance matrix used in MCMC for the full nbar, xi, gmf data vector
    using realisations of galaxy mocks for "data" HOD parameters in the 
    halos from the other subvolumes (subvolume 1 to subvolume 125) of
    the simulation. Covariance matrices for different sets of observables
    can be extracted from the full covariance matrix by slicing through 
    the indices. 

    '''
    nbars = []
    xir = []
    gmfs = []

    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
    ###model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
    
    #some settings for tpcf calculations
    rbins = xi_binedges()
    rmax = rbins.max()
    approx_cell1_size = [rmax , rmax , rmax]
    approx_cellran_size = [rmax , rmax , rmax]
    
    #load randoms and RRs
    
    randoms = data_random(box='md_sub')
    RR = data_RR(box='md_sub')
    NR = len(randoms)

    for i in xrange(1,125):
        print 'mock#', i

        # populate the mock subvolume
        ###mocksubvol = lambda x: util.mask_func(x, i)
        ###model.populate_mock(halocat,
        ###                    masking_function=mocksubvol,
        ###                    enforce_PBC=False)
        model.populate_mock(halocat)
        # returning the positions of galaxies in the entire volume
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        # masking out the galaxies outside the subvolume i
        pos = util.mask_galaxy_table(pos , i)
        # calculate nbar
        print "shape of pos" , pos.shape 
        nbars.append(len(pos) / 200**3.)
        # translate the positions of randoms to the new subbox
        xi0 , yi0 , zi0 = util.random_shifter(i)
        temp_randoms = randoms.copy()
        temp_randoms[:,0] += xi0
        temp_randoms[:,1] += yi0
        temp_randoms[:,2] += zi0
        #calculate xi(r)        
        xi=tpcf(
             pos, rbins, pos, 
             randoms=temp_randoms, period=None, 
             max_sample_size=int(3e5), estimator='Natural', 
             approx_cell1_size=approx_cell1_size, 
             approx_cellran_size=approx_cellran_size,
             RR_precomputed = RR,
	     NR_precomputed = NR)
        xir.append(xi)
        # calculate gmf

        nbar = len(pos) / 200**3.
        b = b_normal * (nbar)**(-1./3) 
        groups = pyfof.friends_of_friends(pos , b)
    	w = np.array([len(x) for x in groups])
    	gbins = gmf_bins()
    	gmf = np.histogram(w , gbins)[0] / 200.**3.
        gmfs.append(gmf)

    # save nbar variance
    nbar_var = np.var(nbars, axis=0, ddof=1)
    nbar_file = ''.join([util.obvs_dir(), 'nbar_var.Mr', str(Mr), '.dat'])
    np.savetxt(nbar_file, [nbar_var])

    # write full covariance matrix of various combinations of the data
    # and invert for the likelihood evaluations

    # --- covariance for all three ---
    fulldatarr = np.hstack((np.array(nbars).reshape(len(nbars), 1),
                            np.array(xir),
                            np.array(gmfs)))
    fullcov = np.cov(fulldatarr.T)
    fullcorr = np.corrcoef(fulldatarr.T)

    # and save the covariance matrix
    nopoisson_file = ''.join([util.obvs_dir(),
        'MCMC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal,2)), '.dat'])
    np.savetxt(nopoisson_file, fullcov)
    return None
Beispiel #21
0
def build_nbar_xi_gmf(Mr=21, b_normal=0.25):
    ''' Build data vector [nbar, xi, gmf] and save to file 
    This data vector is built from the zeroth slice of the multidark
    The other slices will be used for building the covariance matrix.

    Parameters
    ----------
    Mr : (int) 
        Absolute magnitude cut off M_r. Default M_r = -21.

    b_normal : (float) 
        FoF Linking length
    '''
    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname='multidark',
                                redshift=0,
                                halo_finder='rockstar')
    ####model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}

    ####datsubvol = lambda x: util.mask_func(x, 0)
    ####model.populate_mock(halocat, masking_function=datsubvol, enforce_PBC=False)
    model.populate_mock(halocat)

    #all the things necessary for tpcf calculation
    pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
    #masking the galaxies outside the subvolume 0
    pos = util.mask_galaxy_table(pos, 0)
    rbins = xi_binedges()
    rmax = rbins.max()
    approx_cell1_size = [rmax, rmax, rmax]
    approx_cellran_size = [rmax, rmax, rmax]

    #compute number density
    nbar = len(pos) / 200**3.

    # load MD subvolume randoms and RRs
    randoms = data_random(box='md_sub')
    RR = data_RR(box='md_sub')
    NR = len(randoms)

    #compue tpcf with Natural estimator
    data_xir = tpcf(pos,
                    rbins,
                    pos,
                    randoms=randoms,
                    period=None,
                    max_sample_size=int(2e5),
                    estimator='Natural',
                    approx_cell1_size=approx_cell1_size,
                    approx_cellran_size=approx_cellran_size,
                    RR_precomputed=RR,
                    NR_precomputed=NR)

    fullvec = np.append(nbar, data_xir)

    #compute gmf
    b = b_normal * (nbar)**(-1. / 3)
    groups = pyfof.friends_of_friends(pos, b)
    w = np.array([len(x) for x in groups])
    gbins = gmf_bins()
    gmf = np.histogram(w, gbins)[0] / (200.**3.)
    fullvec = np.append(fullvec, gmf)

    output_file = data_file(Mr=Mr, b_normal=b_normal)
    np.savetxt(output_file, fullvec)
    return None
Beispiel #22
0
def build_ABC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25):
    ''' Build covariance matrix used in ABC for the full nbar, xi, gmf data vector
    using realisations of galaxy mocks for "data" HOD parameters in the 
    halos from the multidark simulation. Covariance matrices for different sets of observables
    can be extracted from the full covariance matrix by slicing through 
    the indices. 

    Notes 
    -----
    * This covariance matrix is the covariance matrix calculated from the *entire* multidark 
        box. So this does _not_ account for the sample variance, which the MCMC covariance does. 
    '''
    nbars, xir, gmfs = [], [], [] 

    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
    rbins = xi_binedges()  # some setting for tpcf calculations

    rmax = rbins.max()
    approx_cell1_size = [rmax , rmax , rmax]
    approx_cellran_size = [rmax , rmax , rmax]
    
    # load randoms and RRs for the ENTIRE MultiDark volume 
    ###randoms = data_random(box='md_all')
    ###RR = data_RR(box='md_all')
    ###NR = len(randoms)

    for i in xrange(1,125):
        print 'mock#', i
        # populate the mock subvolume
        model.populate_mock(halocat)
        # returning the positions of galaxies
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

        # calculate nbar
        nbars.append(len(pos) / 1000**3.)

        # calculate xi(r) for the ENTIRE MultiDark volume 
        # using the natural estimator DD/RR - 1
        xi = tpcf(
                pos, rbins, period=model.mock.Lbox, 
                max_sample_size=int(3e5), estimator='Natural', 
                approx_cell1_size=approx_cell1_size)
        xir.append(xi)

        # calculate gmf
        nbar = len(pos) / 1000**3.
        b = b_normal * (nbar)**(-1./3) 
        groups = pyfof.friends_of_friends(pos , b)
        w = np.array([len(x) for x in groups])
        gbins = gmf_bins()
        gmf = np.histogram(w , gbins)[0] / (1000.**3.)
        gmfs.append(gmf)  # GMF

    # save nbar variance
    nbar_var = np.var(nbars, axis=0, ddof=1)
    nbar_file = ''.join([util.obvs_dir(), 'abc_nbar_var.Mr', str(Mr), '.dat'])
    np.savetxt(nbar_file, [nbar_var])

    # write full covariance matrix of various combinations of the data
    # and invert for the likelihood evaluations

    # --- covariance for all three ---
    fulldatarr = np.hstack((np.array(nbars).reshape(len(nbars), 1),
                            np.array(xir),
                            np.array(gmfs)))
    fullcov = np.cov(fulldatarr.T)
    fullcorr = np.corrcoef(fulldatarr.T)
    # and save the covariance matrix
    nopoisson_file = ''.join([util.obvs_dir(),
        'ABC.nbar_xi_gmf_cov', '.no_poisson', '.Mr', str(Mr), '.bnorm', str(round(b_normal, 2)), '.dat'])
    np.savetxt(nopoisson_file, fullcov)
    return None
Beispiel #23
0
def build_ABC_cov_nbar_xi_gmf(Mr=21, b_normal=0.25):
    ''' Build covariance matrix used in ABC for the full nbar, xi, gmf data vector
    using realisations of galaxy mocks for "data" HOD parameters in the 
    halos from the multidark simulation. Covariance matrices for different sets of observables
    can be extracted from the full covariance matrix by slicing through 
    the indices. 

    Notes 
    -----
    * This covariance matrix is the covariance matrix calculated from the *entire* multidark 
        box. So this does _not_ account for the sample variance, which the MCMC covariance does. 
    '''
    nbars, xir, gmfs = [], [], []

    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname='multidark',
                                redshift=0,
                                halo_finder='rockstar')
    rbins = xi_binedges()  # some setting for tpcf calculations

    rmax = rbins.max()
    approx_cell1_size = [rmax, rmax, rmax]
    approx_cellran_size = [rmax, rmax, rmax]

    # load randoms and RRs for the ENTIRE MultiDark volume
    ###randoms = data_random(box='md_all')
    ###RR = data_RR(box='md_all')
    ###NR = len(randoms)

    for i in xrange(1, 125):
        print 'mock#', i
        # populate the mock subvolume
        model.populate_mock(halocat)
        # returning the positions of galaxies
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

        # calculate nbar
        nbars.append(len(pos) / 1000**3.)

        # calculate xi(r) for the ENTIRE MultiDark volume
        # using the natural estimator DD/RR - 1
        xi = tpcf(pos,
                  rbins,
                  period=model.mock.Lbox,
                  max_sample_size=int(3e5),
                  estimator='Natural',
                  approx_cell1_size=approx_cell1_size)
        xir.append(xi)

        # calculate gmf
        nbar = len(pos) / 1000**3.
        b = b_normal * (nbar)**(-1. / 3)
        groups = pyfof.friends_of_friends(pos, b)
        w = np.array([len(x) for x in groups])
        gbins = gmf_bins()
        gmf = np.histogram(w, gbins)[0] / (1000.**3.)
        gmfs.append(gmf)  # GMF

    # save nbar variance
    nbar_var = np.var(nbars, axis=0, ddof=1)
    nbar_file = ''.join([util.obvs_dir(), 'abc_nbar_var.Mr', str(Mr), '.dat'])
    np.savetxt(nbar_file, [nbar_var])

    # write full covariance matrix of various combinations of the data
    # and invert for the likelihood evaluations

    # --- covariance for all three ---
    fulldatarr = np.hstack(
        (np.array(nbars).reshape(len(nbars),
                                 1), np.array(xir), np.array(gmfs)))
    fullcov = np.cov(fulldatarr.T)
    fullcorr = np.corrcoef(fulldatarr.T)
    # and save the covariance matrix
    nopoisson_file = ''.join([
        util.obvs_dir(), 'ABC.nbar_xi_gmf_cov', '.no_poisson', '.Mr',
        str(Mr), '.bnorm',
        str(round(b_normal, 2)), '.dat'
    ])
    np.savetxt(nopoisson_file, fullcov)
    return None
Beispiel #24
0
def build_nbar_xi_gmf_cov(Mr=21):
    ''' Build covariance matrix for the full nbar, xi, gmf data vector
    using realisations of galaxy mocks for "data" HOD parameters in the 
    halos from the multidark simulation. Covariance matrices for different sets of observables
    can be extracted from the full covariance matrix by slicing through 
    the indices. 

    '''
    nbars = []
    xir = []
    gmfs = []

    thr = -1. * np.float(Mr)
    model = PrebuiltHodModelFactory('zheng07', threshold=thr)
    halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
    #some settings for tpcf calculations
    rbins = hardcoded_xi_bins()

    for i in xrange(1,125):
        print 'mock#', i

        # populate the mock subvolume
        model.populate_mock(halocat)
        # returning the positions of galaxies
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        # calculate nbar
        nbars.append(len(pos) / 1000**3.)
        # translate the positions of randoms to the new subbox
        #calculate xi(r)        
        xi = tpcf(pos, rbins, period = model.mock.Lbox, 
                  max_sample_size=int(2e5), estimator='Landy-Szalay')
        xir.append(xi)
        # calculate gmf
        nbar = len(pos) / 1000**3.
        b_normal = 0.75
        b = b_normal * (nbar)**(-1./3) 
        groups = pyfof.friends_of_friends(pos , b)
        w = np.array([len(x) for x in groups])
        gbins = gmf_bins()
        gmf = np.histogram(w , gbins)[0] / (1000.**3.)
        gmfs.append(gmf)  # GMF
    # save nbar variance
    nbar_var = np.var(nbars, axis=0, ddof=1)
    nbar_file = ''.join([util.multidat_dir(), 'abc_nbar_var.Mr', str(Mr), '.dat'])
    np.savetxt(nbar_file, [nbar_var])


    # write full covariance matrix of various combinations of the data
    # and invert for the likelihood evaluations

    # --- covariance for all three ---
    fulldatarr = np.hstack((np.array(nbars).reshape(len(nbars), 1),
                            np.array(xir),
                            np.array(gmfs)))
    fullcov = np.cov(fulldatarr.T)
    fullcorr = np.corrcoef(fulldatarr.T)
    # and save the covariance matrix
    nopoisson_file = ''.join([util.multidat_dir(), 'abc_nbar_xi_gmf_cov.no_poisson.Mr', str(Mr), '.dat'])
    np.savetxt(nopoisson_file, fullcov)

    # and a correlation matrix
    full_corr_file = ''.join([util.multidat_dir(), 'abc_nbar_xi_gmf_corr.Mr', str(Mr), '.dat'])
    np.savetxt(full_corr_file, fullcorr)

    return None
def test_precomputed_rr(Nr, Mr = 21):
    '''
    Mr = Luminositty threshold
    Nr = Number of randoms
    '''

    rbins = np.logspace(-1, 1.25, 15)
    rmax = rbins.max()
    rbin_centers = (rbins[1:] + rbins[0:-1])/2.    

    halocat = CachedHaloCatalog(simname = 'bolshoi', redshift = 0)
    model = PrebuiltHodModelFactory("zheng07")
    model.populate_mock(halocat = halocat, enforce_PBC = False)
    data = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
    print data.shape
    L = halocat.Lbox

    xmin , ymin , zmin = 0., 0., 0.
    xmax , ymax , zmax = L, L, L
   
    
    num_randoms = Nr
    xran = np.random.uniform(xmin, xmax, num_randoms)
    yran = np.random.uniform(ymin, ymax, num_randoms)
    zran = np.random.uniform(zmin, zmax, num_randoms)
    randoms = np.vstack((xran, yran, zran)).T
    

    verbose = False
    num_threads = cpu_count()

    period = None
    approx_cell1_size = [rmax, rmax, rmax]
    approx_cell2_size = approx_cell1_size
    approx_cellran_size = [rmax, rmax, rmax]

    normal_result = tpcf(
            data, rbins, data, 
            randoms=randoms, period = period, 
            max_sample_size=int(1e4), estimator='Landy-Szalay', 
            approx_cell1_size=approx_cell1_size, 
            approx_cellran_size=approx_cellran_size)


    #count data pairs
    DD = npairs(
            data, data, rbins, period,
            verbose, num_threads,
            approx_cell1_size, approx_cell2_size)
    DD = np.diff(DD)
    #count random pairs
    RR = npairs(
            randoms, randoms, rbins, period,
            verbose, num_threads,
            approx_cellran_size, approx_cellran_size)
    RR = np.diff(RR)
    #count data random pairs 
    DR = npairs(
            data, randoms, rbins, period,
            verbose, num_threads,
            approx_cell1_size, approx_cell2_size)
    DR = np.diff(DR)

    print "DD=", DD
    print "DR=", DR
    print "RR=", RR        
    
    ND = len(data)
    NR = len(randoms)

    factor1 = ND*ND/(NR*NR)
    factor2 = ND*NR/(NR*NR)

    mult = lambda x,y: x*y
    xi_LS = mult(1.0/factor1,DD/RR) - mult(1.0/factor2,2.0*DR/RR) + 1.0

    print "xi=" , xi_LS
    print "normal=" , normal_result

    result_with_RR_precomputed = tpcf(
            data, rbins, data, 
            randoms=randoms, period = period, 
            max_sample_size=int(1e5), estimator='Landy-Szalay', 
            approx_cell1_size=approx_cell1_size, 
            approx_cellran_size=approx_cellran_size, 
            RR_precomputed = RR, 
            NR_precomputed = NR)
    
    print "xi_pre=" , result_with_RR_precomputed
Beispiel #26
0
zran = np.random.uniform(0, 1000, num_randoms)
full_randoms = np.vstack((xran, yran, zran)).T

# Get the full box mock
model = PrebuiltHodModelFactory('zheng07', threshold=-21)
halocat = CachedHaloCatalog(simname='multidark', redshift=0,
                            halo_finder='rockstar')
model.populate_mock(halocat, enforce_PBC=False)
pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

# Get full tpcf
print "getting full vol tpcf..."
xi_full_pc = tpcf(pos, xi_bin,
                  randoms=full_randoms,
                  do_auto=True, do_cross=False,
                  max_sample_size=int(pos.shape[0]),
                  estimator='Natural',
                  approx_cell1_size=[rmax, rmax, rmax],
                  approx_cellran_size=[rmax, rmax, rmax])
print "done"

Nsub = 8

# Now set up for subvol boxes
num_randoms = 50 * 8000
xran = np.random.uniform(0, 250, num_randoms)
yran = np.random.uniform(0, 250, num_randoms)
zran = np.random.uniform(0, 250, num_randoms)
sub_randoms = np.vstack((xran, yran, zran)).T

sub_model = PrebuiltHodModelFactory('zheng07')
Beispiel #27
0
def test_precomputed_rr(Nr, Mr=21):
    '''
    Mr = Luminositty threshold
    Nr = Number of randoms
    '''

    rbins = np.logspace(-1, 1.25, 15)
    rmax = rbins.max()
    rbin_centers = (rbins[1:] + rbins[0:-1]) / 2.

    halocat = CachedHaloCatalog(simname='bolshoi', redshift=0)
    model = PrebuiltHodModelFactory("zheng07")
    model.populate_mock(halocat=halocat, enforce_PBC=False)
    data = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
    print data.shape
    L = halocat.Lbox

    xmin, ymin, zmin = 0., 0., 0.
    xmax, ymax, zmax = L, L, L

    num_randoms = Nr
    xran = np.random.uniform(xmin, xmax, num_randoms)
    yran = np.random.uniform(ymin, ymax, num_randoms)
    zran = np.random.uniform(zmin, zmax, num_randoms)
    randoms = np.vstack((xran, yran, zran)).T

    verbose = False
    num_threads = cpu_count()

    period = None
    approx_cell1_size = [rmax, rmax, rmax]
    approx_cell2_size = approx_cell1_size
    approx_cellran_size = [rmax, rmax, rmax]

    normal_result = tpcf(data,
                         rbins,
                         data,
                         randoms=randoms,
                         period=period,
                         max_sample_size=int(1e4),
                         estimator='Landy-Szalay',
                         approx_cell1_size=approx_cell1_size,
                         approx_cellran_size=approx_cellran_size)

    #count data pairs
    DD = npairs(data, data, rbins, period, verbose, num_threads,
                approx_cell1_size, approx_cell2_size)
    DD = np.diff(DD)
    #count random pairs
    RR = npairs(randoms, randoms, rbins, period, verbose, num_threads,
                approx_cellran_size, approx_cellran_size)
    RR = np.diff(RR)
    #count data random pairs
    DR = npairs(data, randoms, rbins, period, verbose, num_threads,
                approx_cell1_size, approx_cell2_size)
    DR = np.diff(DR)

    print "DD=", DD
    print "DR=", DR
    print "RR=", RR

    ND = len(data)
    NR = len(randoms)

    factor1 = ND * ND / (NR * NR)
    factor2 = ND * NR / (NR * NR)

    mult = lambda x, y: x * y
    xi_LS = mult(1.0 / factor1, DD / RR) - mult(1.0 / factor2,
                                                2.0 * DR / RR) + 1.0

    print "xi=", xi_LS
    print "normal=", normal_result

    result_with_RR_precomputed = tpcf(data,
                                      rbins,
                                      data,
                                      randoms=randoms,
                                      period=period,
                                      max_sample_size=int(1e5),
                                      estimator='Landy-Szalay',
                                      approx_cell1_size=approx_cell1_size,
                                      approx_cellran_size=approx_cellran_size,
                                      RR_precomputed=RR,
                                      NR_precomputed=NR)

    print "xi_pre=", result_with_RR_precomputed
Beispiel #28
0
def Subvolume_Analytic(N_sub, ratio=False):
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    analytic 2PCF for the entire MultiDark volume

    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub',
        str(N_sub), '.p'
    ])

    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges()

    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else:
        # Entire MultiDark Volume (Analytic xi)
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname='multidark',
                                    redshift=0,
                                    halo_finder='rockstar')

        model.populate_mock(halocat)
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

        # while the estimator claims to be Landy-Szalay, I highly suspect it
        # actually uses Landy-Szalay since DR pairs cannot be calculated from
        # analytic randoms
        full_xi = tpcf(pos,
                       xi_bin,
                       period=model.mock.Lbox,
                       max_sample_size=int(2e5),
                       estimator='Landy-Szalay',
                       num_threads=1)
        data_dump = {}
        data_dump['full_xi'] = full_xi

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 full_xi,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Analytic $\xi$ Entire Volume')

    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs)
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname='multidark',
                                        redshift=0,
                                        halo_finder='rockstar')
        RR = data_RR()
        randoms = data_random()
        NR = len(randoms)

    for method in ['Landy-Szalay', 'Natural']:

        if method == 'Landy-Szalay':
            iii = 3
        elif method == 'Natural':
            iii = 5

        if not os.path.isfile(pickle_file):
            sub_xis_list = []
            sub_xis = np.zeros(len(full_xi))

            for ii in range(1, N_sub + 1):
                # randomly sample one of the subvolumes
                rint = ii  #np.random.randint(1, 125)
                simsubvol = lambda x: util.mask_func(x, rint)
                sub_model.populate_mock(sub_halocat,
                                        masking_function=simsubvol,
                                        enforce_PBC=False)

                pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x',
                                           'y', 'z')

                xi, yi, zi = util.random_shifter(rint)
                temp_randoms = randoms.copy()
                temp_randoms[:, 0] += xi
                temp_randoms[:, 1] += yi
                temp_randoms[:, 2] += zi

                rmax = xi_bin.max()
                approx_cell1_size = [rmax, rmax, rmax]
                approx_cellran_size = [rmax, rmax, rmax]

                sub_xi = tpcf(pos,
                              xi_bin,
                              pos,
                              randoms=temp_randoms,
                              period=None,
                              max_sample_size=int(1e5),
                              estimator=method,
                              approx_cell1_size=approx_cell1_size,
                              approx_cellran_size=approx_cellran_size,
                              RR_precomputed=RR,
                              NR_precomputed=NR)
                label = None
                if ii == N_sub - 1:
                    label = 'Subvolumes'

                #if not ratio:
                #    sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii])
                sub_xis += sub_xi
                sub_xis_list.append(sub_xi)

            sub_xi_avg = sub_xis / np.float(N_sub)

            data_dump[method] = {}
            data_dump[method]['sub_xi_avg'] = sub_xi_avg
            data_dump[method]['sub_xis_list'] = sub_xis_list
        else:
            sub_xis_list = data_dump[method]['sub_xis_list']
            sub_xi_avg = data_dump[method]['sub_xi_avg']

        if not ratio:
            sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                     sub_xi_avg,
                     lw=2,
                     ls='--',
                     c=pretty_colors[iii],
                     label='Subvolume ' + method)
        else:
            sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                     sub_xi_avg / full_xi,
                     lw=2,
                     ls='--',
                     c=pretty_colors[iii],
                     label='Subvolume ' + method)

    if not os.path.isfile(pickle_file):
        pickle.dump(data_dump, open(pickle_file, 'wb'))

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')

    if not ratio:
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else:
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$",
                       fontsize=25)

    sub.legend(loc='lower left')

    if ratio:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_analytic.Nsub',
            str(N_sub), '.ratio.png'
        ])
    else:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_analytic.Nsub',
            str(N_sub), '.png'
        ])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None