Example #1
0
def main():
    ap = ArgumentParser()
    ap.add_argument('fpm', help='e.g. /scratch/fpm_0.1000/')
    ap.add_argument('ll', type=float, help='e.g. 0.2 or 0.168') 
    ap.add_argument('--with-peak', help='Find Peaks KDDensity estimation (slow)', default=True)
    ap.add_argument('fof', help='e.g. /scratch/fpm_0.1000/fof . Will write to {fof}/{ll}')
    ap.add_argument('--nmin', type=int, default=20, help='min number of particles to be in the catalogue')

    ns = ap.parse_args()

    cat = BigFileCatalog(ns.fpm, header='Header', dataset='1/')

    cat.attrs['BoxSize']  = numpy.ones(3) * cat.attrs['BoxSize'][0]
    cat.attrs['Nmesh']  = numpy.ones(3) * cat.attrs['NC'][0]

    cosmo = Planck15.match(Omega0_m=cat.attrs['OmegaM'])

    M0 = cat.attrs['OmegaM'][0] * 27.75 * 1e10 * cat.attrs['BoxSize'].prod() / cat.csize

    if cat.comm.rank == 0:
        print('BoxSize', cat.attrs['BoxSize'])
        print('Nmesh', cat.attrs['Nmesh'])
        print('Mass of a particle', M0)
        print('OmegaM', cosmo.Om0)


    if ns.with_peak:
        cat['Density'] = KDDensity(cat).density

    fof = FOF(cat, linking_length=ns.ll, nmin=ns.nmin)

    if ns.with_peak:
        features = fof.find_features(peakcolumn='Density')
    else:
        features = fof.find_features(peakcolumn=None)

    features['Mass'] = M0 * features['Length']
    if fof.comm.rank == 0:
        print('Total number of features found', features.csize)
        print('Saving columns', features.columns)

    features.save(ns.fof + '/%0.3f' % ns.ll, features.columns)
def main(argv):
    del argv
    """ -------------- setting paramaeters ------------------------"""
    params = FLAGS.flag_values_dict()
    params['Nmesh'] = [FLAGS.Nmesh] * 3
    params['BoxSize'] = [FLAGS.boxsize] * 3
    params['Nmesh2D'] = [FLAGS.Nmesh2D] * 2
    params['BoxSize2D'] = [FLAGS.boxsize2D] * 2
    params['zs_source'] = [float(zs) for zs in FLAGS.zs_source]

    if params['custom_cosmo']:
        cosmo = Planck15.match(Omega0_m=FLAGS.Omega_m)
        cosmo = cosmo.match(sigma8=FLAGS.sigma_8)
    else:
        if rank == 0:
            print('custom_cosmo is set to False. Using default cosmology.')
        cosmo = Planck15

    if params['save3D'] or params['save3Dpower']:
        try:
            assert (params['interpolate'] == False)
        except:
            raise ValueError(
                'interpolate must be set to False if requesting 3D outouts')
    """------- setting output dirs and saving parameters-----------"""
    dirs = {}
    if rank == 0:
        cmd = "git log --pretty=format:'%h' -n 1"
        githash = subprocess.run([cmd], stdout=subprocess.PIPE,
                                 shell=True).stdout.decode('utf-8')
        print('dumping under githash %s' % githash)
        output_path = os.path.join(FLAGS.output_path, githash)
        params_path = os.path.join(os.path.join(os.getcwd()), 'runs', githash)
        params['output_path'] = output_path
        print(params_path)
        if not os.path.isdir(params_path):
            os.makedirs(params_path)

        # make sure parameter file name is unique and we are not repeating a run
        num_run = 0
        found = True
        while found:
            path_name = os.path.join(output_path,
                                     params['label'] + '%d/' % num_run)
            params_file = os.path.join(params_path,
                                       params['label'] + '%d.json' % num_run)
            if not os.path.isdir(path_name):
                os.makedirs(path_name)
                found = False
            if not os.path.isfile(params_file):
                found = False
            else:
                with open(params_file, 'r') as f:
                    old_params = json.load(f)
                    if old_params == params and not params['debug']:
                        raise ValueError(
                            'run with same settings already exists: %s' %
                            params_file)
                    elif params['debug']:
                        found = False
                    else:
                        num_run += 1

        for result in ['cls', 'maps', 'snapshots']:
            dirs[result] = os.path.join(path_name, result)
            if not os.path.isdir(dirs[result]):
                os.makedirs(dirs[result])

        fjson = json.dumps(params)
        f = open(params_file, "w")
        f.write(fjson)
        f.close()

    dirs = comm.bcast(dirs, root=0)
    params['snapshot_dir'] = dirs['snapshots']
    """---------------------------run actual simulations-----------------------------"""
    sims_start = time.time()

    for ii in range(FLAGS.N_maps):
        if rank == 0:
            print('progress in percent:', ii / params['N_maps'] * 100)
        kmaps, kmaps_deriv, pm = run_wl_sim(params, cosmo=cosmo, num=ii)

        for jj, z_source in enumerate(params['zs_source']):
            kmap = kmaps[jj]
            mapfile = os.path.join(
                dirs['maps'], 'map_decon_zsource%d_map%d_of%d' %
                (z_source * 10, ii, params['N_maps']) + '.npy')
            save_2Dmap(kmap, mapfile)
            if rank == 0:
                print('2D map #%d at z_s=%.1f dumped to %s' %
                      (ii, z_source, mapfile))

    end = time.time()
    if rank == 0:
        print('time taken per sim in sec %d' %
              ((end - sims_start) /
               (params['N_maps'] * len(params['zs_source']))))
        print('time takes before sims in sec %d' % (sims_start - start))
Example #3
0
def main():
    """ 
    Script to compute FOF halos from treepm DM catalog. Compute virial mass
    which is needed for HOD models. 

    Note: Before March 2020, used mass given by number of particles in halo,
    see psiRec/psirec/main_ms_gadget_fof_halofinder_nbkit0.3.py.
    """

    ap = ArgumentParser()
    ap.add_argument('treepm', 
        help='Directory of TreePM matter field, e.g. /scratch/treepm_0.1000/')
    ap.add_argument('ll', type=float, 
        help='Linking length of finding halos, e.g. 0.2 or 0.168', 
        default=0.2)
    ap.add_argument('fof', 
        help=('Output directory of halo catalogs, e.g. '
              '/scratch/treepm_0.1000/fof . Will write to {fof}/{ll_nmin_mvir}'))
    ap.add_argument('--nmin', type=int, default=20, 
        help='min number of particles to be in the catalogue')
    ap.add_argument('--with-peak', help='Find Peaks KDDensity estimation (slow)', 
        default=False)


    ns = ap.parse_args()

    cat = BigFileCatalog(ns.treepm, header='Header', dataset='1/')



    cat.attrs['BoxSize']  = np.ones(3) * cat.attrs['BoxSize'][0]
    cat.attrs['Nmesh']  = np.ones(3) * 512.0    # in TreePM catalog, there is no 'NC' attribute
    
    cosmo = Planck15.match(Omega0_m=cat.attrs['Omega0'])
    # In TreePM, we need to use 'Omega0' instead of 'OmegaM' in FastPM.
    # csize is the total number of particles
    M0 = (cat.attrs['Omega0'][0] * 27.75 * 1e10 * cat.attrs['BoxSize'].prod() 
            / cat.csize)

    redshift = 1.0/cat.attrs['Time'][0]-1.0

    if cat.comm.rank == 0:
        print('BoxSize', cat.attrs['BoxSize'])
        print('Mass of a particle', M0)
        print('OmegaM', cosmo.Om0)
        print('attrs', cat.attrs.keys())
        print('Redshift', redshift)


    if ns.with_peak:
        posdef = 'peak'
    else:
        posdef = 'cm'

    # Halos which have more than nmin particles are selected.
    fof = FOF(cat, linking_length=ns.ll, nmin=ns.nmin)  

    # Compute halo catalog. Mass column contains virial mass, which is needed
    # to get concentration needed for hod.
    halos = fof.to_halos(
        cosmo=cosmo,
        redshift=redshift,
        particle_mass=M0,
        mdef='vir',
        posdef=posdef,
        peakcolumn='Density')

    halos['log10M'] = np.log10(halos['Mass'])

    # print info
    if fof.comm.rank == 0:
        print('Total number of halos found', halos.csize)
        print('Saving columns', halos.columns)
        if not os.path.exists(ns.fof):
            os.makedirs(ns.fof)

    # Save the halo catalog to disk so can easily load it later to populate
    # galaxies with hod.
    out_fname = ns.fof + '/ll_{0:.3f}_nmin{1}_mvir'.format(ns.ll, ns.nmin+1)

    if ns.with_peak:
        out_fname += '_peakpos'

    # MS: Somehow crashes b/c some ranks don't see header file. running
    # a second time works though. maybe write header first with 
    # single rank?
    halos.save(out_fname, halos.columns)

    if fof.comm.rank == 0:
        print('Saved HaloCatalog to %s' % out_fname)
Example #4
0
def run_hod(cat,
            HOD_model_name=None,
            hod_seed=42,
            add_RSD=False,
            RSD_LOS=None):
    """
    Run HOD to get galaxy catalog from input halo catalog.

    Parameters
    ----------
    cat : nbodykit Catalog object
        Input halo catalog, should use virial mass as 'Mass' column.
    """
    if cat.comm.rank == 0:
        print('HOD model: %s' % HOD_model_name)
    cat.attrs['BoxSize'] = np.ones(3) * cat.attrs['BoxSize'][0]
    cat.attrs['Nmesh'] = np.ones(
        3) * 512.0  # in TreePM catalog, there is no 'NC' attribute

    cosmo = Planck15.match(Omega0_m=cat.attrs['Omega0'])
    # In TreePM, we need to use 'Omega0' instead of 'OmegaM' in FastPM.
    # csize is the total number of particles
    M0 = (cat.attrs['Omega0'][0] * 27.75 * 1e10 * cat.attrs['BoxSize'].prod() /
          cat.csize)
    redshift = 1.0 / cat.attrs['Time'][0] - 1.0

    # convert to HaloCatalog
    halos = HaloCatalog(cat, cosmo, redshift)

    if cat.comm.rank == 0:
        print('BoxSize', halos.attrs['BoxSize'])
        print('attrs', halos.attrs.keys())
        print('RSDFactor', halos.attrs['RSDFactor'])
        print('Columns', halos.columns)

    # Define HOD
    if HOD_model_name in [
            'Zheng07_HandSeljak17_v2', 'Zheng07_HandSeljak17_centrals_v2',
            'Zheng07_HandSeljak17_sats_v2',
            'Zheng07_HandSeljak17_parent_halos_v2'
    ]:

        # (1) Hand & Seljak 1706.02362:
        # Uses {log10 Mmin, sigma log10 M, log10 M1, alpha, log10 Mcut} = {12.99, 0.308, 14.08, 0.824, 13.20}.
        # See Reid et al https://arxiv.org/pdf/1404.3742.pdf eq 17-19

        # (2) halotools docs on zheng07 model:
        #  See https://halotools.readthedocs.io/en/stable/quickstart_and_tutorials/tutorials/model_building/preloaded_models/zheng07_composite_model.html#zheng07-parameters):
        # logMmin - Minimum mass required for a halo to host a central galaxy.
        # sigma_logM - Rate of transition from <Ncen>=0 -> <Ncen=1>.
        # alpha - Power law slope of the relation between halo mass and <Nsat>.
        # logM0 - Low-mass cutoff in <Nsat>.
        # logM1 - Characteristic halo mass where <Nsat> begins to assume a power law form.

        # 11 June 2020: Zheng07_HandSeljak17_v2 uses fixed RSDFactor, which was wrong by factor of 1/a before.

        hodmodel = Zheng07Model.to_halotools(cosmo=cosmo,
                                             redshift=redshift,
                                             mdef='vir')

        # HOD parameters from Hand & Seljak 1706.02362
        hodmodel.param_dict['logMmin'] = 12.99
        hodmodel.param_dict['sigma_logM'] = 0.308
        hodmodel.param_dict['logM1'] = 14.08
        hodmodel.param_dict['alpha'] = 1.06
        hodmodel.param_dict[
            'logM0'] = 13.20  # this is called Mcut in Hand et al and Reid et al.

        if cat.comm.rank == 0:
            print('Use zheng07model with:', hodmodel.param_dict)

        # Run HOD
        galcat = halos.populate(hodmodel, seed=hod_seed)

        # select which galaxies to keep
        if HOD_model_name == 'Zheng07_HandSeljak17_v2':
            # keep all
            pass

        elif HOD_model_name == 'Zheng07_HandSeljak17_centrals_v2':
            # select only centrals
            ww = galcat['gal_type'] == 0  # 0: central, 1: satellite
            galcat = galcat[ww]

        elif HOD_model_name == 'Zheng07_HandSeljak17_sats_v2':
            # select only satellites
            ww = galcat['gal_type'] == 1  # 0: central, 1: satellite
            galcat = galcat[ww]

        elif HOD_model_name == 'Zheng07_HandSeljak17_parent_halos_v2':
            # select centrals
            ww = galcat['gal_type'] == 0  # 0: central, 1: satellite
            galcat = galcat[ww]

            # set position to that of parent halo (in Mpc/h)
            halo_pos = galcat['Position'].compute() + np.nan
            halo_pos[:, 0] = galcat['halo_x'].compute()
            halo_pos[:, 1] = galcat['halo_y'].compute()
            halo_pos[:, 2] = galcat['halo_z'].compute()
            galcat['Position'] = halo_pos
            del halo_pos

            # set velocity to that of parent halo (in km/s)
            halo_vel = galcat['Velocity'].compute() + np.nan
            halo_vel[:, 0] = galcat['halo_vx'].compute()
            halo_vel[:, 1] = galcat['halo_vy'].compute()
            halo_vel[:, 2] = galcat['halo_vz'].compute()
            galcat['Velocity'] = halo_vel
            del halo_vel

            # Get RSD displacement = v_z/(aH(a)), where v_z is halo velocity.
            # Compute rsd_factor = 1/(aH(a)) = (1+z)/H(z)
            # see https://nbodykit.readthedocs.io/en/latest/catalogs/common-operations.html#Adding-Redshift-space-Distortions
            rsd_factor = (1. + redshift) / (100. * cosmo.efunc(redshift))
            raise Exception(
                'this is not correct for ms_gadget which has a^2 dx/dt for velocity.'
            )
            galcat['VelocityOffset'] = rsd_factor * galcat['Velocity']

            # columns: ['Position', 'Selection', 'Value', 'Velocity', 'VelocityOffset', 'Weight', 'conc_NFWmodel', 'gal_type', 'halo_hostid', 'halo_id', 'halo_mvir', 'halo_num_centrals', 'halo_num_satellites', 'halo_rvir', 'halo_upid', 'halo_vx', 'halo_vy', 'halo_vz', 'halo_x', 'halo_y', 'halo_z', 'host_centric_distance', 'vx', 'vy', 'vz', 'x', 'y', 'z']

    else:
        raise Exception('Unknown hod_model %s' % HOD_model_name)

    if add_RSD:
        assert type(RSD_LOS) == np.ndarray
        assert RSD_LOS.shape == (3, )
        print('cat attrs:', galcat.attrs)

        # It seems like halos.populate gives satellite velocity in km/s by drawing from NFW profile, and sets central velocity equal to halo velocity.
        # But not sure what units are assumed for halo velocity. Note we have different velocity a prefactor in ms_gadget and new MP-Gadget format.
        # Also, should probably use peak velocity instead of bulk velocity of halos for the centrals velocity.
        # So HOD just seems screwed up.
        raise Exception(
            'todo: use RSDFactor of the catalog! VelocityOffset can be wrong by factor of a if catalog has a^2 dx/dt (ms_gadget) instead of a dx/dt.'
        )

        galcat['Position'] = (galcat['Position'] +
                              galcat['VelocityOffset'] * RSD_LOS)

    if cat.comm.rank == 0:
        print('galcat', galcat)
        print('attrs', galcat.attrs)
        print('columns', galcat.columns)
        print('fsat', galcat.attrs['fsat'])

    return galcat
Example #5
0
    def __init__(self, githash, label, rnum, local_path, alter_path=None):
        """
        loads the parameter file of the run
        githash: string, abridged githash of commit under which the run was performed
        label  : string, label of the run
        rnum   : int, number of run under this label and githash
        local_path: string, path under which parameter files have been stored 
        """

        #-------------------------------------------------------------#
        params_path = os.path.join(local_path, 'runs', githash)
        params_file = os.path.join(params_path, label + '%d.json' % rnum)
        with open(params_file, 'r') as f:
            self.params = json.load(f)
        if alter_path is None:
            path_name = os.path.join(self.params['output_path'],
                                     self.params['label'] + '%d/' % rnum)
        else:
            path_name = os.path.join(alter_path,
                                     self.params['label'] + '%d/' % rnum)

        self.dirs = {}
        for result in ['cls', 'maps', 'snapshots']:
            self.dirs[result] = os.path.join(path_name, result)
        #-------------------------------------------------------------#

        cosmo = Planck15.match(Omega0_m=self.params['Omega_m'])
        self.cosmo = cosmo.match(sigma8=self.params['sigma_8'])

        self.pm = ParticleMesh(Nmesh=self.params['Nmesh'],
                               BoxSize=self.params['BoxSize'],
                               resampler='cic')

        BoxSize2D = [deg / 180. * np.pi for deg in self.params['BoxSize2D']]
        self.pm2D = ParticleMesh(BoxSize=BoxSize2D,
                                 Nmesh=self.params['Nmesh2D'],
                                 resampler='cic')

        z_int = np.logspace(-8, np.log10(1500), 10000)
        chis = cosmo.comoving_distance(z_int)  #Mpc/h
        self.z_chi_int = scipy.interpolate.interp1d(chis,
                                                    z_int,
                                                    kind=3,
                                                    bounds_error=False,
                                                    fill_value=0.)

        self.theory_cls = {}
        self.measured_cls = {}
        print(
            'Loading run with BoxSize %d, Nmesh %d, SourceRedshift %.2f, PGD %s and interpolation %s.'
            % (self.params['BoxSize'][0], self.params['Nmesh'][0],
               self.params['zs_source'][0], str(
                   self.params['PGD']), str(self.params['interpolate'])))

        # count how many maps have been dumped
        NN = len(os.listdir(self.dirs['maps']))
        if NN < self.params['N_maps']:
            print(
                'less maps produces than requested. Requested:%d Produced:%d' %
                (self.params['N_maps'], NN))
        self.N_maps = NN
        print('%d maps were produced in this run' % self.N_maps)

        self.Nyquist_3D = np.pi * self.pm.Nmesh[0] / self.pm.BoxSize[0]
        self.Nyquist_2D = np.pi * self.pm2D.Nmesh[0] / self.pm2D.BoxSize[0]