Beispiel #1
0
def PlotCovariance(obvs, Mr=21, b_normal=0.25, inference='mcmc'):
    ''' Plot the covariance matrix for a specified obvs 
    '''
    # import the covariance matrix 
    covar = Data.data_cov(Mr=Mr, b_normal=b_normal, inference=inference)

    if obvs == 'xi': 
        obvs_cov = covar[1:16 , 1:16]
        r_bin = Data.xi_binedges()
    elif obvs == 'gmf': 
        obvs_cov = covar[17:, 17:]
        binedges = Data.data_gmf_bins()
        r_bin = 0.5 * (binedges[:-1] + binedges[1:]) 
    
    n_bin = int(np.sqrt(obvs_cov.size))
    
    # calculate the reduced covariance for plotting
    red_covar = np.zeros([n_bin, n_bin])
    for ii in range(n_bin): 
        for jj in range(n_bin): 
            red_covar[ii][jj] = obvs_cov[ii][jj]/np.sqrt(obvs_cov[ii][ii] * obvs_cov[jj][jj])
    
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    cont = sub.pcolormesh(r_bin, r_bin, red_covar, cmap=plt.cm.afmhot_r)
    plt.colorbar(cont)

    sub.set_xlim([r_bin[0], r_bin[-1]])
    sub.set_ylim([r_bin[0], r_bin[-1]])
    sub.set_xscale('log')
    sub.set_yscale('log')

    sub.set_xlabel(r'$\mathtt{r}\;[\mathtt{Mpc/h}$]', fontsize=25)
    sub.set_ylabel(r'$\mathtt{r}\;[\mathtt{Mpc/h}$]', fontsize=25)
    fig_file = ''.join([util.fig_dir(),
        obvs.upper(), 'covariance',
        '.Mr', str(Mr), 
        '.bnorm', str(round(b_normal,2)), 
        '.', inference, '_inf.png'])
    fig.savefig(fig_file, bbox_inches='tight') 
    plt.close()
    return None 
Beispiel #2
0
def plot_data(Mr = 21 , output_dir = None):
    '''
    plot summary statistics of the data
    '''
    if output_dir is None:
        fig_dir = util.fig_dir()
    else:
        fig_dir = output_dir

    cov = Data.data_cov()
    xi_err = np.sqrt(np.diag(cov)[1:16])
    gmf_err = np.sqrt(np.diag(cov)[16:]) 
   
    fig , axes = plt.subplots(1, 2, figsize=(10, 5))
    fig.subplots_adjust(wspace=0.4, hspace=0.4)
    ax = axes[0]
    x = Data.xi_binedges()
    y = Data.data_xi() 
    ax.errorbar(0.5*(x[:-1]+x[1:]), y, yerr=xi_err, fmt=".k", capsize=0)
    ax.set_yscale('log')
    ax.set_xscale('log')
    ax.set_xlim(0.2, 22)
    ax.set_xlabel(r'$r[\mathrm{Mpc}/h]$')
    ax.set_ylabel(r'$\xi(r)$')
    
    ax = axes[1]
    x = Data.gmf_bins()
    y = Data.data_gmf() 
    ax.errorbar(0.5*(x[:-1]+x[1:]), y, yerr=gmf_err, fmt=".k", capsize=0)
    ax.set_xlim(1, 20)
    ax.set_yscale('log')
    ax.set_xlabel(r'Group Richness $N$')
    ax.set_ylabel(r'$\zeta(N)$')

    fig_file = ''.join([fig_dir, "data", '_Mr', str(Mr),'.pdf'])
    plt.savefig(fig_file)
    plt.close()
Beispiel #3
0
def plot_data(Mr=21, output_dir=None):
    '''
    plot summary statistics of the data
    '''
    if output_dir is None:
        fig_dir = util.fig_dir()
    else:
        fig_dir = output_dir

    cov = Data.data_cov()
    xi_err = np.sqrt(np.diag(cov)[1:16])
    gmf_err = np.sqrt(np.diag(cov)[16:])

    fig, axes = plt.subplots(1, 2, figsize=(10, 5))
    fig.subplots_adjust(wspace=0.4, hspace=0.4)
    ax = axes[0]
    x = Data.xi_binedges()
    y = Data.data_xi()
    ax.errorbar(0.5 * (x[:-1] + x[1:]), y, yerr=xi_err, fmt=".k", capsize=0)
    ax.set_yscale('log')
    ax.set_xscale('log')
    ax.set_xlim(0.2, 22)
    ax.set_xlabel(r'$r[\mathrm{Mpc}/h]$')
    ax.set_ylabel(r'$\xi(r)$')

    ax = axes[1]
    x = Data.gmf_bins()
    y = Data.data_gmf()
    ax.errorbar(0.5 * (x[:-1] + x[1:]), y, yerr=gmf_err, fmt=".k", capsize=0)
    ax.set_xlim(1, 20)
    ax.set_yscale('log')
    ax.set_xlabel(r'Group Richness $N$')
    ax.set_ylabel(r'$\zeta(N)$')

    fig_file = ''.join([fig_dir, "data", '_Mr', str(Mr), '.pdf'])
    plt.savefig(fig_file)
    plt.close()
Beispiel #4
0
def Subvolume_FullvolumeCut(N_sub, ratio=False): 
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions
    and actual pair counts, CUT into subvolumes of the same size *AFTER*
    populate mock 


    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 
        'xi_subvolume_fullvolume_cut_test', 
        '.Nsub', str(N_sub), 
        '.p'])
    
    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges() 
    
    # Entire MultiDark Volume (No Periodic Boundary Conditions) 
    model = PrebuiltHodModelFactory('zheng07', threshold=-21)
    halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')

    sub_RR = data_RR(box='md_sub')
    sub_randoms = data_random(box='md_sub')
    sub_NR = len(sub_randoms)

    rmax = xi_bin.max()
    full_approx_cell1_size = [rmax , rmax , rmax]
    full_approx_cellran_size = [rmax , rmax , rmax]

    model.populate_mock(halocat, enforce_PBC=False)
    subvol_id = util.mk_id_column(table=model.mock.galaxy_table)
    full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
    
    # Full Volume 
    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else: 
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')

        full_randoms = data_random(box='md_all')
        full_RR = data_RR(box='md_all')
        full_NR = len(full_randoms)

        rmax = xi_bin.max()
        full_approx_cell1_size = [rmax , rmax , rmax]
        full_approx_cellran_size = [rmax , rmax , rmax]

        model.populate_mock(halocat, enforce_PBC=False)
        full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        
        full_xi = tpcf(
                full_pos, xi_bin, 
                randoms=full_randoms, period=None, 
                do_auto=True,
                do_cross=False, 
                num_threads=5, 
                max_sample_size=int(full_pos.shape[0]), estimator='Natural', 
                approx_cell1_size=full_approx_cell1_size, 
                approx_cellran_size=full_approx_cellran_size,
                RR_precomputed = full_RR,
                NR_precomputed = full_NR)
        data_dump = {} 
        data_dump['full_xi'] = full_xi
    
    if not ratio:  
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, 
                lw=2, ls='-', c='k', label=r'Full Volume') 

    if os.path.isfile(pickle_file):
        fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list']
        fullcut_xi_avg =  data_dump['fullcut_xi']['fullcut_xi_avg']
    else: 
        data_dump['fullcut_xi'] = {}
        fullcut_xi_list = [] 
        fullcut_xi_tot = np.zeros(len(xi_bin)-1)
        for id in np.unique(subvol_id)[:N_sub]: 
            print 'Subvolume ', id
            in_cut = np.where(subvol_id == id) 

            fullcut_pos = full_pos[in_cut]

            fullcut_xi = tpcf(
                    fullcut_pos, xi_bin, 
                    randoms=sub_randoms, period=None, 
                    do_auto=True,
                    do_cross=False, 
                    num_threads=5, 
                    max_sample_size=int(fullcut_pos.shape[0]), estimator='Natural', 
                    approx_cell1_size=full_approx_cell1_size, 
                    approx_cellran_size=full_approx_cellran_size,
                    RR_precomputed=sub_RR,
                    NR_precomputed=sub_NR)

            fullcut_xi_list.append(fullcut_xi)
            fullcut_xi_tot += fullcut_xi
        
        fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub)
        data_dump['fullcut_xi']['fullcut_xi_list']= fullcut_xi_list
        data_dump['fullcut_xi']['fullcut_xi_avg']= fullcut_xi_avg

    if not ratio:  
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg, 
                lw=2, ls='-', c='k', label=r'Full Volume Cut Average') 
    else: 
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), fullcut_xi_avg/full_xi, 
                lw=2, ls='-', c='k', label=r'Full Volume Cut Average') 
    
    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs) 
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
        sub_RR = data_RR(box='md_sub')
        sub_randoms = data_random(box='md_sub')
        sub_NR = len(sub_randoms)
    
        
        sub_xis_list = [] 
        sub_xis = np.zeros(len(full_xi)) 

        for ii in range(1,N_sub): 
            print 'Subvolume ', ii 
            # randomly sample one of the subvolumes
            rint = ii #np.random.randint(1, 125)
            simsubvol = lambda x: util.mask_func(x, rint)
            sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False)
               
            pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z')

            xi, yi , zi = util.random_shifter(rint)
            temp_randoms = sub_randoms.copy()
            temp_randoms[:,0] += xi
            temp_randoms[:,1] += yi
            temp_randoms[:,2] += zi
            
            rmax = xi_bin.max()
            sub_approx_cell1_size = [rmax , rmax , rmax]
            sub_approx_cellran_size = [rmax , rmax , rmax]

            sub_xi = tpcf(
                    pos, xi_bin,  
                    randoms=temp_randoms, 
                    period = None, 
                    do_auto=True,
                    do_cross=False, 
                    num_threads=5, 
                    max_sample_size=int(pos.shape[0]), 
                    estimator='Natural', 
                    approx_cell1_size = sub_approx_cell1_size, 
                    approx_cellran_size = sub_approx_cellran_size,
                    RR_precomputed=sub_RR,
                    NR_precomputed=sub_NR)

            label = None 
            if ii == N_sub - 1: 
                label = 'Subvolumes'
            
            sub_xis += sub_xi
            sub_xis_list.append(sub_xi)

        sub_xi_avg = sub_xis/np.float(N_sub)

        data_dump['Natural'] = {} 
        data_dump['Natural']['sub_xi_avg'] = sub_xi_avg
        data_dump['Natural']['sub_xis_list'] = sub_xis_list 
    else: 
        sub_xis_list = data_dump['Natural']['sub_xis_list']
        sub_xi_avg = data_dump['Natural']['sub_xi_avg'] 

    if not os.path.isfile(pickle_file): 
        pickle.dump(data_dump, open(pickle_file, 'wb')) 

    if not ratio: 
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, 
                lw=2, ls='--', c=pretty_colors[3], label='Subvolume')
    else: 
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, 
                lw=2, ls='--', c=pretty_colors[3], label='Subvolume')

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')
    
    if not ratio: 
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else: 
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25)

    sub.legend(loc='lower left')
    
    if ratio: 
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.ratio.png'])
    else:
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub', str(N_sub), '.png'])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None 
Beispiel #5
0
def Subvolume_Analytic(N_sub, ratio=False): 
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    analytic 2PCF for the entire MultiDark volume

    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 
        'xi_subvolume_test', 
        '.Nsub', str(N_sub), 
        '.p'])
    
    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges() 
    
    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else: 
        # Entire MultiDark Volume (Analytic xi) 
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
        
        model.populate_mock(halocat)
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')
        
        # while the estimator claims to be Landy-Szalay, I highly suspect it
        # actually uses Landy-Szalay since DR pairs cannot be calculated from 
        # analytic randoms
        full_xi = tpcf(pos, xi_bin, period=model.mock.Lbox, max_sample_size=int(2e5), estimator='Landy-Szalay', num_threads=1)
        data_dump = {} 
        data_dump['full_xi'] = full_xi

    if not ratio:  
        sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), full_xi, lw=2, ls='-', c='k', label=r'Analytic $\xi$ Entire Volume') 
    
    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs) 
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname = 'multidark', redshift = 0, halo_finder = 'rockstar')
        RR = data_RR()
        randoms = data_random()
        NR = len(randoms)
    
    for method in ['Landy-Szalay', 'Natural']:
        
        if method == 'Landy-Szalay': 
            iii = 3
        elif method == 'Natural': 
            iii = 5
        
        if not os.path.isfile(pickle_file): 
            sub_xis_list = [] 
            sub_xis = np.zeros(len(full_xi)) 

            for ii in range(1,N_sub+1): 
                # randomly sample one of the subvolumes
                rint = ii #np.random.randint(1, 125)
                simsubvol = lambda x: util.mask_func(x, rint)
                sub_model.populate_mock(sub_halocat, masking_function=simsubvol, enforce_PBC=False)
                   
                pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y', 'z')

                xi, yi , zi = util.random_shifter(rint)
                temp_randoms = randoms.copy()
                temp_randoms[:,0] += xi
                temp_randoms[:,1] += yi
                temp_randoms[:,2] += zi
                
                rmax = xi_bin.max()
                approx_cell1_size = [rmax , rmax , rmax]
                approx_cellran_size = [rmax , rmax , rmax]

                sub_xi = tpcf(
                        pos, xi_bin, pos, 
                        randoms=temp_randoms, 
                        period = None, 
                        max_sample_size=int(1e5), 
                        estimator=method, 
                        approx_cell1_size = approx_cell1_size, 
                        approx_cellran_size = approx_cellran_size,
                        RR_precomputed=RR,
                        NR_precomputed=NR)
                label = None 
                if ii == N_sub - 1: 
                    label = 'Subvolumes'
                
                #if not ratio: 
                #    sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii])
                sub_xis += sub_xi
                sub_xis_list.append(sub_xi)

            sub_xi_avg = sub_xis/np.float(N_sub)

            data_dump[method] = {} 
            data_dump[method]['sub_xi_avg'] = sub_xi_avg
            data_dump[method]['sub_xis_list'] = sub_xis_list 
        else: 
            sub_xis_list = data_dump[method]['sub_xis_list']
            sub_xi_avg = data_dump[method]['sub_xi_avg'] 

        if not ratio: 
            sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg, 
                    lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method)
        else: 
            sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi_avg/full_xi, 
                    lw=2, ls='--', c=pretty_colors[iii], label='Subvolume '+method)
    
    if not os.path.isfile(pickle_file): 
        pickle.dump(data_dump, open(pickle_file, 'wb')) 

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')
    
    if not ratio: 
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else: 
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$", fontsize=25)

    sub.legend(loc='lower left')
    
    if ratio: 
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.ratio.png'])
    else:
        fig_file = ''.join([util.fig_dir(), 'test_xi_subvolume_analytic.Nsub', str(N_sub), '.png'])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None 
Beispiel #6
0
    def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']):
        '''
        Given theta, sum_stat calculates the observables from our forward model

        Parameters
        ----------
        theta : (self explanatory)
        prior_range : If specified, checks to make sure that theta is within the prior range.
        '''
        self.model.param_dict['logM0'] = theta[0]
        self.model.param_dict['sigma_logM'] = np.exp(theta[1])
        self.model.param_dict['logMmin'] = theta[2]
        self.model.param_dict['alpha'] = theta[3]
        self.model.param_dict['logM1'] = theta[4]

        rbins = xi_binedges()
        rmax = rbins.max()
        approx_cell1_size = [rmax, rmax, rmax]
        approx_cellran_size = [rmax, rmax, rmax]

        if prior_range is None:

            self.model.populate_mock(self.halocat, enforce_PBC=False)
            pos = three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y',
                                       'z')
            obvs = []

            for obv in observables:
                if obv == 'nbar':
                    obvs.append(len(pos) /
                                1000.**3.)  # nbar of the galaxy catalog
                elif obv == 'gmf':
                    nbar = len(pos) / 1000**3.
                    b = self.b_normal * (nbar)**(-1. / 3)
                    groups = pyfof.friends_of_friends(pos, b)
                    w = np.array([len(x) for x in groups])
                    gbins = data_gmf_bins()
                    gmf = np.histogram(w, gbins)[0] / (1000.**3.)
                    obvs.append(gmf)
                elif obv == 'xi':
                    greek_xi = tpcf(pos,
                                    rbins,
                                    pos,
                                    randoms=randoms,
                                    period=None,
                                    max_sample_size=int(3e5),
                                    estimator='Natural',
                                    approx_cell1_size=approx_cell1_size,
                                    approx_cellran_size=approx_cellran_size,
                                    RR_precomputed=self.RR,
                                    NR_precomputed=self.NR)
                    obvs.append(greek_xi)
                else:
                    raise NotImplementedError(
                        'Only nbar 2pcf, gmf implemented so far')

            return obvs

        else:
            if np.all((prior_range[:, 0] < theta)
                      & (theta < prior_range[:, 1])):
                # if all theta_i is within prior range ...
                try:

                    self.model.populate_mock(self.halocat)
                    pos = three_dim_pos_bundle(self.model.mock.galaxy_table,
                                               'x', 'y', 'z')
                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(len(pos) /
                                        1000**3.)  # nbar of the galaxy catalog
                        elif obv == 'gmf':
                            nbar = len(pos) / 1000**3.
                            b = self.b_normal * (nbar)**(-1. / 3)
                            groups = pyfof.friends_of_friends(pos, b)
                            w = np.array([len(x) for x in groups])
                            gbins = data_gmf_bins()
                            gmf = np.histogram(w, gbins)[0] / (1000.**3.)
                            obvs.append(gmf)
                        elif obv == 'xi':
                            greek_xi = tpcf(
                                pos,
                                rbins,
                                pos,
                                randoms=randoms,
                                period=None,
                                max_sample_size=int(3e5),
                                estimator='Natural',
                                approx_cell1_size=approx_cell1_size,
                                approx_cellran_size=approx_cellran_size,
                                RR_precomputed=self.RR,
                                NR_precomputed=self.NR)
                            obvs.append(greek_xi)
                        else:
                            raise NotImplementedError(
                                'Only nbar, tpcf, and gmf are implemented so far'
                            )

                    return obvs

                except ValueError:

                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(10.)
                        elif obv == 'gmf':
                            bins = data_gmf_bins()
                            obvs.append(np.ones_like(bins)[:-1] * 1000.)
                        elif obv == 'xi':
                            obvs.append(np.zeros(len(xi_binedges()[:-1])))
                    return obvs
            else:
                obvs = []
                for obv in observables:
                    if obv == 'nbar':
                        obvs.append(10.)
                    elif obv == 'gmf':
                        bins = data_gmf_bins()
                        obvs.append(np.ones_like(bins)[:-1] * 1000.)
                    elif obv == 'xi':
                        obvs.append(np.zeros(len(xi_binedges()[:-1])))
                return obvs
Beispiel #7
0
    def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']):
        '''
        Given theta, sum_stat calculates the observables from our forward model

        Parameters
        ----------
        theta : (self explanatory)
        prior_range : If specified, checks to make sure that theta is within the prior range.
        '''
        self.model.param_dict['logM0'] = theta[0]
        self.model.param_dict['sigma_logM'] = np.exp(theta[1])
        self.model.param_dict['logMmin'] = theta[2]
        self.model.param_dict['alpha'] = theta[3]
        self.model.param_dict['logM1'] = theta[4]

        rbins = xi_binedges()
        rmax = rbins.max()
        approx_cell1_size = [rmax , rmax , rmax]
        approx_cellran_size = [rmax , rmax , rmax]

        if prior_range is None:
            
            self.model.populate_mock(self.halocat)
            pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
            obvs = []

            for obv in observables:
                if obv == 'nbar':
                    obvs.append(len(pos) / 1000.**3.)       # nbar of the galaxy catalog
                elif obv == 'gmf':
                    nbar = len(pos) / 1000**3.
    		    b = self.b_normal * (nbar)**(-1./3) 
    		    groups = pyfof.friends_of_friends(pos , b)
    		    w = np.array([len(x) for x in groups])
    		    gbins =data_gmf_bins()
    		    gmf = np.histogram(w , gbins)[0] / (1000.**3.)
                    obvs.append(gmf)   
                elif obv == 'xi':
                    greek_xi = tpcf(
                            pos, rbins,  
                            period=self.model.mock.Lbox, 
                            max_sample_size=int(3e5), estimator='Natural', 
                            approx_cell1_size=approx_cell1_size)
                    obvs.append(greek_xi)
                else:
                    raise NotImplementedError('Only nbar 2pcf, gmf implemented so far')

            return obvs

        else:
            if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])):
                # if all theta_i is within prior range ...
                try:


                    self.model.populate_mock(self.halocat) 
                    pos=three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
            	    obvs = []
            	    for obv in observables:
                        if obv == 'nbar':
                    	    obvs.append(len(pos) / 1000**3.)       # nbar of the galaxy catalog
                        elif obv == 'gmf':
                    	    nbar = len(pos) / 1000**3.
    		    	    b = self.b_normal * (nbar)**(-1./3) 
    		    	    groups = pyfof.friends_of_friends(pos , b)
    		    	    w = np.array([len(x) for x in groups])
    		    	    gbins =data_gmf_bins()
    		    	    gmf = np.histogram(w , gbins)[0] / (1000.**3.)
                    	    obvs.append(gmf)   
                        elif obv == 'xi':
                            greek_xi = tpcf(
                                    pos, rbins, period=self.model.mock.Lbox, 
                                    max_sample_size=int(3e5), estimator='Natural', 
                                    approx_cell1_size=approx_cell1_size)
                            obvs.append(greek_xi)
                        else:
                            raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far')

                    return obvs

                except ValueError:

                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(10.)
                        elif obv == 'gmf':
                            bins = data_gmf_bins()
                            obvs.append(np.ones_like(bins)[:-1]*1000.)
                        elif obv == 'xi':
                            obvs.append(np.zeros(len(xi_binedges()[:-1])))
                    return obvs
            else:
                obvs = []
                for obv in observables:
                    if obv == 'nbar':
                        obvs.append(10.)
                    elif obv == 'gmf':
                        bins = data_gmf_bins()
                        obvs.append(np.ones_like(bins)[:-1]*1000.)
                    elif obv == 'xi':
                        obvs.append(np.zeros(len(xi_binedges()[:-1])))
                return obvs
Beispiel #8
0
    def _sum_stat(self, theta, prior_range=None, observables=['nbar', 'gmf']):
        '''
        Given theta, sum_stat calculates the observables from our forward model

        Parameters
        ----------
        theta : (self explanatory)
        prior_range : If specified, checks to make sure that theta is within the prior range.
        '''
        self.model.param_dict['logM0'] = theta[0]
        self.model.param_dict['sigma_logM'] = np.exp(theta[1])
        self.model.param_dict['logMmin'] = theta[2]
        self.model.param_dict['alpha'] = theta[3]
        self.model.param_dict['logM1'] = theta[4]

        rbins = xi_binedges()
        rmax = rbins.max()
        period = None
        approx_cell1_size = [rmax , rmax , rmax]
        approx_cellran_size = [rmax , rmax , rmax]

        if prior_range is None:
            rint = np.random.randint(1, 125)
            ####simsubvol = lambda x: util.mask_func(x, rint)
            ####self.model.populate_mock(self.halocat,
            ####                masking_function=simsubvol,
            ####                enforce_PBC=False)
            self.model.populate_mock(self.halocat)
                        
            pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
            pos = util.mask_galaxy_table(pos , rint) 

            xi , yi , zi = util.random_shifter(rint)
            temp_randoms = self.randoms.copy()
            temp_randoms[:,0] += xi
            temp_randoms[:,1] += yi
            temp_randoms[:,2] += zi

            obvs = []
            for obv in observables:
                if obv == 'nbar':
                    obvs.append(len(pos) / 200**3.)       # nbar of the galaxy catalog
                elif obv == 'gmf':
                    #compute group richness 
                    nbar = len(pos) / 200**3.
    		    b = self.b_normal * (nbar)**(-1./3) 
    		    groups = pyfof.friends_of_friends(pos , b)
    		    w = np.array([len(x) for x in groups])
    		    gbins = data_gmf_bins()
    		    gmf = np.histogram(w , gbins)[0] / (200.**3.)
                    obvs.append(gmf)   
                elif obv == 'xi':
                    greek_xi = tpcf(
                        pos, rbins, pos, 
                        randoms=temp_randoms, period = period, 
                        max_sample_size=int(1e5), estimator='Natural', 
                        approx_cell1_size=approx_cell1_size, 
                        approx_cellran_size=approx_cellran_size,
                        RR_precomputed = self.RR,
	                NR_precomputed = self.NR)

                    obvs.append(greek_xi)
                else:
                    raise NotImplementedError('Only nbar 2pcf, gmf implemented so far')

            return obvs

        else:
            if np.all((prior_range[:,0] < theta) & (theta < prior_range[:,1])):
                # if all theta_i is within prior range ...
                try:
                    rint = np.random.randint(1, 125)
                    simsubvol = lambda x: util.mask_func(x, rint)
                    self.model.populate_mock(self.halocat,
                                masking_function=simsubvol,
                                enforce_PBC=False)
           
                    pos =three_dim_pos_bundle(self.model.mock.galaxy_table, 'x', 'y', 'z')
                    #imposing mask on the galaxy table
                    pos = util.mask_galaxy_table(pos , rint) 
            	    xi , yi , zi = util.random_shifter(rint)
            	    temp_randoms = self.randoms.copy()
            	    temp_randoms[:,0] += xi
            	    temp_randoms[:,1] += yi
            	    temp_randoms[:,2] += zi
            	    obvs = []

            	    for obv in observables:
                        if obv == 'nbar':
                    	    obvs.append(len(pos) / 200**3.)       # nbar of the galaxy catalog
                        elif obv == 'gmf':
                            nbar = len(pos) / 200**3.
    		            b = self.b_normal * (nbar)**(-1./3) 
    		            groups = pyfof.friends_of_friends(pos , b)
    		            w = np.array([len(x) for x in groups])
    		    	    gbins =data_gmf_bins()
    		            gmf = np.histogram(w , gbins)[0] / (200.**3.)
                    	    obvs.append(gmf)   
                        elif obv == 'xi':
                    	    greek_xi = tpcf(
                                     pos, rbins, pos, 
                        	     randoms=temp_randoms, period = period, 
                                     max_sample_size=int(1e5), estimator='Natural', 
                                     approx_cell1_size=approx_cell1_size, 
                                     approx_cellran_size=approx_cellran_size,
                                     RR_precomputed = self.RR,
	                             NR_precomputed = self.NR)

                    	    obvs.append(greek_xi)
                        else:
                            raise NotImplementedError('Only nbar, tpcf, and gmf are implemented so far')

                    return obvs

                except ValueError:

                    obvs = []
                    for obv in observables:
                        if obv == 'nbar':
                            obvs.append(10.)
                        elif obv == 'gmf':
                            bins = data_gmf_bins()
                            obvs.append(np.ones_like(bins)[:-1]*1000.)
                        elif obv == 'xi':
                            obvs.append(np.zeros(len(xi_binedges()[:-1])))
                    return obvs
            else:
                obvs = []
                for obv in observables:
                    if obv == 'nbar':
                        obvs.append(10.)
                    elif obv == 'gmf':
                        bins = data_gmf_bins()
                        obvs.append(np.ones_like(bins)[:-1]*1000.)
                    elif obv == 'xi':
                        obvs.append(np.zeros(len(xi_binedges()[:-1])))
                return obvs
Beispiel #9
0
def Subvolume_FullvolumeCut(N_sub, ratio=False):
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    2PCF for the entire MultiDark volume WITHOUT periodic boundary conditions
    and actual pair counts, CUT into subvolumes of the same size *AFTER*
    populate mock 


    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_fullvolume_cut_test',
        '.Nsub',
        str(N_sub), '.p'
    ])

    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges()

    # Entire MultiDark Volume (No Periodic Boundary Conditions)
    model = PrebuiltHodModelFactory('zheng07', threshold=-21)
    halocat = CachedHaloCatalog(simname='multidark',
                                redshift=0,
                                halo_finder='rockstar')

    sub_RR = data_RR(box='md_sub')
    sub_randoms = data_random(box='md_sub')
    sub_NR = len(sub_randoms)

    rmax = xi_bin.max()
    full_approx_cell1_size = [rmax, rmax, rmax]
    full_approx_cellran_size = [rmax, rmax, rmax]

    model.populate_mock(halocat, enforce_PBC=False)
    subvol_id = util.mk_id_column(table=model.mock.galaxy_table)
    full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

    # Full Volume
    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else:
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname='multidark',
                                    redshift=0,
                                    halo_finder='rockstar')

        full_randoms = data_random(box='md_all')
        full_RR = data_RR(box='md_all')
        full_NR = len(full_randoms)

        rmax = xi_bin.max()
        full_approx_cell1_size = [rmax, rmax, rmax]
        full_approx_cellran_size = [rmax, rmax, rmax]

        model.populate_mock(halocat, enforce_PBC=False)
        full_pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

        full_xi = tpcf(full_pos,
                       xi_bin,
                       randoms=full_randoms,
                       period=None,
                       do_auto=True,
                       do_cross=False,
                       num_threads=5,
                       max_sample_size=int(full_pos.shape[0]),
                       estimator='Natural',
                       approx_cell1_size=full_approx_cell1_size,
                       approx_cellran_size=full_approx_cellran_size,
                       RR_precomputed=full_RR,
                       NR_precomputed=full_NR)
        data_dump = {}
        data_dump['full_xi'] = full_xi

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 full_xi,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Full Volume')

    if os.path.isfile(pickle_file):
        fullcut_xi_list = data_dump['fullcut_xi']['fullcut_xi_list']
        fullcut_xi_avg = data_dump['fullcut_xi']['fullcut_xi_avg']
    else:
        data_dump['fullcut_xi'] = {}
        fullcut_xi_list = []
        fullcut_xi_tot = np.zeros(len(xi_bin) - 1)
        for id in np.unique(subvol_id)[:N_sub]:
            print 'Subvolume ', id
            in_cut = np.where(subvol_id == id)

            fullcut_pos = full_pos[in_cut]

            fullcut_xi = tpcf(fullcut_pos,
                              xi_bin,
                              randoms=sub_randoms,
                              period=None,
                              do_auto=True,
                              do_cross=False,
                              num_threads=5,
                              max_sample_size=int(fullcut_pos.shape[0]),
                              estimator='Natural',
                              approx_cell1_size=full_approx_cell1_size,
                              approx_cellran_size=full_approx_cellran_size,
                              RR_precomputed=sub_RR,
                              NR_precomputed=sub_NR)

            fullcut_xi_list.append(fullcut_xi)
            fullcut_xi_tot += fullcut_xi

        fullcut_xi_avg = fullcut_xi_tot / np.float(N_sub)
        data_dump['fullcut_xi']['fullcut_xi_list'] = fullcut_xi_list
        data_dump['fullcut_xi']['fullcut_xi_avg'] = fullcut_xi_avg

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 fullcut_xi_avg,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Full Volume Cut Average')
    else:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 fullcut_xi_avg / full_xi,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Full Volume Cut Average')

    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs)
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname='multidark',
                                        redshift=0,
                                        halo_finder='rockstar')
        sub_RR = data_RR(box='md_sub')
        sub_randoms = data_random(box='md_sub')
        sub_NR = len(sub_randoms)

        sub_xis_list = []
        sub_xis = np.zeros(len(full_xi))

        for ii in range(1, N_sub):
            print 'Subvolume ', ii
            # randomly sample one of the subvolumes
            rint = ii  #np.random.randint(1, 125)
            simsubvol = lambda x: util.mask_func(x, rint)
            sub_model.populate_mock(sub_halocat,
                                    masking_function=simsubvol,
                                    enforce_PBC=False)

            pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x', 'y',
                                       'z')

            xi, yi, zi = util.random_shifter(rint)
            temp_randoms = sub_randoms.copy()
            temp_randoms[:, 0] += xi
            temp_randoms[:, 1] += yi
            temp_randoms[:, 2] += zi

            rmax = xi_bin.max()
            sub_approx_cell1_size = [rmax, rmax, rmax]
            sub_approx_cellran_size = [rmax, rmax, rmax]

            sub_xi = tpcf(pos,
                          xi_bin,
                          randoms=temp_randoms,
                          period=None,
                          do_auto=True,
                          do_cross=False,
                          num_threads=5,
                          max_sample_size=int(pos.shape[0]),
                          estimator='Natural',
                          approx_cell1_size=sub_approx_cell1_size,
                          approx_cellran_size=sub_approx_cellran_size,
                          RR_precomputed=sub_RR,
                          NR_precomputed=sub_NR)

            label = None
            if ii == N_sub - 1:
                label = 'Subvolumes'

            sub_xis += sub_xi
            sub_xis_list.append(sub_xi)

        sub_xi_avg = sub_xis / np.float(N_sub)

        data_dump['Natural'] = {}
        data_dump['Natural']['sub_xi_avg'] = sub_xi_avg
        data_dump['Natural']['sub_xis_list'] = sub_xis_list
    else:
        sub_xis_list = data_dump['Natural']['sub_xis_list']
        sub_xi_avg = data_dump['Natural']['sub_xi_avg']

    if not os.path.isfile(pickle_file):
        pickle.dump(data_dump, open(pickle_file, 'wb'))

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 sub_xi_avg,
                 lw=2,
                 ls='--',
                 c=pretty_colors[3],
                 label='Subvolume')
    else:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 sub_xi_avg / full_xi,
                 lw=2,
                 ls='--',
                 c=pretty_colors[3],
                 label='Subvolume')

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')

    if not ratio:
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else:
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$",
                       fontsize=25)

    sub.legend(loc='lower left')

    if ratio:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub',
            str(N_sub), '.ratio.png'
        ])
    else:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_fullvolume_cut.Nsub',
            str(N_sub), '.png'
        ])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None
Beispiel #10
0
def Subvolume_Analytic(N_sub, ratio=False):
    ''' Test the 2PCF estimates from MultiDark subvolume versus the 
    analytic 2PCF for the entire MultiDark volume

    Parameters
    ----------
    N_sub : (int)
        Number of subvolumes to sample

    '''
    prettyplot()
    pretty_colors = prettycolors()

    pickle_file = ''.join([
        '/export/bbq2/hahn/ccppabc/dump/', 'xi_subvolume_test', '.Nsub',
        str(N_sub), '.p'
    ])

    fig = plt.figure(1)
    sub = fig.add_subplot(111)

    xi_bin = xi_binedges()

    if os.path.isfile(pickle_file):
        data_dump = pickle.load(open(pickle_file, 'rb'))
        full_xi = data_dump['full_xi']
    else:
        # Entire MultiDark Volume (Analytic xi)
        model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        halocat = CachedHaloCatalog(simname='multidark',
                                    redshift=0,
                                    halo_finder='rockstar')

        model.populate_mock(halocat)
        pos = three_dim_pos_bundle(model.mock.galaxy_table, 'x', 'y', 'z')

        # while the estimator claims to be Landy-Szalay, I highly suspect it
        # actually uses Landy-Szalay since DR pairs cannot be calculated from
        # analytic randoms
        full_xi = tpcf(pos,
                       xi_bin,
                       period=model.mock.Lbox,
                       max_sample_size=int(2e5),
                       estimator='Landy-Szalay',
                       num_threads=1)
        data_dump = {}
        data_dump['full_xi'] = full_xi

    if not ratio:
        sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                 full_xi,
                 lw=2,
                 ls='-',
                 c='k',
                 label=r'Analytic $\xi$ Entire Volume')

    if not os.path.isfile(pickle_file):
        # MultiDark SubVolume (precomputed RR pairs)
        sub_model = PrebuiltHodModelFactory('zheng07', threshold=-21)
        sub_model.new_haloprop_func_dict = {'sim_subvol': util.mk_id_column}
        sub_halocat = CachedHaloCatalog(simname='multidark',
                                        redshift=0,
                                        halo_finder='rockstar')
        RR = data_RR()
        randoms = data_random()
        NR = len(randoms)

    for method in ['Landy-Szalay', 'Natural']:

        if method == 'Landy-Szalay':
            iii = 3
        elif method == 'Natural':
            iii = 5

        if not os.path.isfile(pickle_file):
            sub_xis_list = []
            sub_xis = np.zeros(len(full_xi))

            for ii in range(1, N_sub + 1):
                # randomly sample one of the subvolumes
                rint = ii  #np.random.randint(1, 125)
                simsubvol = lambda x: util.mask_func(x, rint)
                sub_model.populate_mock(sub_halocat,
                                        masking_function=simsubvol,
                                        enforce_PBC=False)

                pos = three_dim_pos_bundle(sub_model.mock.galaxy_table, 'x',
                                           'y', 'z')

                xi, yi, zi = util.random_shifter(rint)
                temp_randoms = randoms.copy()
                temp_randoms[:, 0] += xi
                temp_randoms[:, 1] += yi
                temp_randoms[:, 2] += zi

                rmax = xi_bin.max()
                approx_cell1_size = [rmax, rmax, rmax]
                approx_cellran_size = [rmax, rmax, rmax]

                sub_xi = tpcf(pos,
                              xi_bin,
                              pos,
                              randoms=temp_randoms,
                              period=None,
                              max_sample_size=int(1e5),
                              estimator=method,
                              approx_cell1_size=approx_cell1_size,
                              approx_cellran_size=approx_cellran_size,
                              RR_precomputed=RR,
                              NR_precomputed=NR)
                label = None
                if ii == N_sub - 1:
                    label = 'Subvolumes'

                #if not ratio:
                #    sub.plot(0.5*(xi_bin[:-1]+xi_bin[1:]), sub_xi, lw=0.5, ls='--', c=pretty_colors[iii])
                sub_xis += sub_xi
                sub_xis_list.append(sub_xi)

            sub_xi_avg = sub_xis / np.float(N_sub)

            data_dump[method] = {}
            data_dump[method]['sub_xi_avg'] = sub_xi_avg
            data_dump[method]['sub_xis_list'] = sub_xis_list
        else:
            sub_xis_list = data_dump[method]['sub_xis_list']
            sub_xi_avg = data_dump[method]['sub_xi_avg']

        if not ratio:
            sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                     sub_xi_avg,
                     lw=2,
                     ls='--',
                     c=pretty_colors[iii],
                     label='Subvolume ' + method)
        else:
            sub.plot(0.5 * (xi_bin[:-1] + xi_bin[1:]),
                     sub_xi_avg / full_xi,
                     lw=2,
                     ls='--',
                     c=pretty_colors[iii],
                     label='Subvolume ' + method)

    if not os.path.isfile(pickle_file):
        pickle.dump(data_dump, open(pickle_file, 'wb'))

    sub.set_xlim([0.1, 50.])
    sub.set_xlabel('r', fontsize=30)
    sub.set_xscale('log')

    if not ratio:
        sub.set_ylabel(r"$\xi \mathtt{(r)}$", fontsize=25)
        sub.set_yscale('log')
    else:
        sub.set_ylabel(r"$\overline{\xi^\mathtt{sub}}/\xi^\mathtt{all}$",
                       fontsize=25)

    sub.legend(loc='lower left')

    if ratio:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_analytic.Nsub',
            str(N_sub), '.ratio.png'
        ])
    else:
        fig_file = ''.join([
            util.fig_dir(), 'test_xi_subvolume_analytic.Nsub',
            str(N_sub), '.png'
        ])
    fig.savefig(fig_file, bbox_inches='tight', dpi=100)
    plt.close()
    return None